diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
223 files changed, 6394 insertions, 3825 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp index 2a45acf63aa2..5beac5547d65 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6079,7 +6079,7 @@ static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset, Type *Int32Ty = Type::getInt32Ty(Ptr->getContext()); auto *OffsetConstInt = dyn_cast<ConstantInt>(Offset); - if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64) + if (!OffsetConstInt || OffsetConstInt->getBitWidth() > 64) return nullptr; APInt OffsetInt = OffsetConstInt->getValue().sextOrTrunc( diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp index 910f6b72afef..89cc7ea15ec1 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp @@ -1566,7 +1566,6 @@ void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { Info.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); if (auto *Impl = Info.getImpl()) Impl->clear(); @@ -1627,9 +1626,8 @@ void LazyValueInfoWrapperPass::releaseMemory() { Info.releaseMemory(); } LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { auto &AC = FAM.getResult<AssumptionAnalysis>(F); - auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - return LazyValueInfo(&AC, &F.getParent()->getDataLayout(), &TLI); + return LazyValueInfo(&AC, &F.getParent()->getDataLayout()); } /// Returns true if we can statically tell that this value will never be a @@ -1714,11 +1712,11 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, const ValueLatticeElement &Val, - const DataLayout &DL, TargetLibraryInfo *TLI) { + const DataLayout &DL) { // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; if (Val.isConstant()) { - Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL, TLI); + Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL); if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res)) return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True; return LazyValueInfo::Unknown; @@ -1759,15 +1757,13 @@ getPredicateResult(unsigned Pred, Constant *C, const ValueLatticeElement &Val, if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Val.getNotConstant(), C, DL, - TLI); + Val.getNotConstant(), C, DL); if (Res && Res->isNullValue()) return LazyValueInfo::False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Val.getNotConstant(), C, DL, - TLI); + Val.getNotConstant(), C, DL); if (Res && Res->isNullValue()) return LazyValueInfo::True; } @@ -1787,7 +1783,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, ValueLatticeElement Result = getOrCreateImpl(M).getValueOnEdge(V, FromBB, ToBB, CxtI); - return getPredicateResult(Pred, C, Result, M->getDataLayout(), TLI); + return getPredicateResult(Pred, C, Result, M->getDataLayout()); } LazyValueInfo::Tristate @@ -1811,7 +1807,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, ValueLatticeElement Result = UseBlockValue ? Impl.getValueInBlock(V, CxtI->getParent(), CxtI) : Impl.getValueAt(V, CxtI); - Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI); + Tristate Ret = getPredicateResult(Pred, C, Result, DL); if (Ret != Unknown) return Ret; diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 0894560fd078..89666018d925 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -143,7 +143,7 @@ static cl::opt<bool, true> HoistRuntimeChecks( "hoist-runtime-checks", cl::Hidden, cl::desc( "Hoist inner loop runtime memory checks to outer loop if possible"), - cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(false)); + cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true)); bool VectorizerParams::HoistRuntimeChecks; bool VectorizerParams::isInterleaveForced() { diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 49eccde45f31..951e00e34142 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -1292,16 +1292,16 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( if (InsertRes.first->second != Pointer.getAddr()) { // Make sure to clean up the Visited map before continuing on to // PredTranslationFailure. - for (unsigned i = 0; i < NewBlocks.size(); i++) - Visited.erase(NewBlocks[i]); + for (auto *NewBlock : NewBlocks) + Visited.erase(NewBlock); goto PredTranslationFailure; } } if (NewBlocks.size() > WorklistEntries) { // Make sure to clean up the Visited map before continuing on to // PredTranslationFailure. - for (unsigned i = 0; i < NewBlocks.size(); i++) - Visited.erase(NewBlocks[i]); + for (auto *NewBlock : NewBlocks) + Visited.erase(NewBlock); GotWorklistLimit = true; goto PredTranslationFailure; } @@ -1359,8 +1359,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // Make sure to clean up the Visited map before continuing on to // PredTranslationFailure. - for (unsigned i = 0, n = PredList.size(); i < n; ++i) - Visited.erase(PredList[i].first); + for (const auto &Pred : PredList) + Visited.erase(Pred.first); goto PredTranslationFailure; } @@ -1371,9 +1371,9 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // any results for. (getNonLocalPointerDepFromBB will modify our // datastructures in ways the code after the PredTranslationFailure label // doesn't expect.) - for (unsigned i = 0, n = PredList.size(); i < n; ++i) { - BasicBlock *Pred = PredList[i].first; - PHITransAddr &PredPointer = PredList[i].second; + for (auto &I : PredList) { + BasicBlock *Pred = I.first; + PHITransAddr &PredPointer = I.second; Value *PredPtrVal = PredPointer.getAddr(); bool CanTranslate = true; diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp index 580fe112fcd7..623814c038a7 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp @@ -7914,9 +7914,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // expression. We already checked that ShlAmt < BitWidth, so // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as // ShlAmt - AShrAmt < Amt. - uint64_t ShlAmt = ShlAmtCI->getZExtValue(); - if (ShlAmtCI->getValue().ult(BitWidth) && ShlAmt >= AShrAmt) { - APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, ShlAmt - AShrAmt); + const APInt &ShlAmt = ShlAmtCI->getValue(); + if (ShlAmt.ult(BitWidth) && ShlAmt.uge(AShrAmt)) { + APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, + ShlAmtCI->getZExtValue() - AShrAmt); const SCEV *CompositeExpr = getMulExpr(AddTruncateExpr, getConstant(Mul)); if (L->getOpcode() != Instruction::Shl) diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp index 20959cf6948f..bbb7c86d2185 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1149,6 +1149,16 @@ bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl, return isValidProtoForLibFunc(*FDecl.getFunctionType(), F, *M); } +bool TargetLibraryInfoImpl::getLibFunc(unsigned int Opcode, Type *Ty, + LibFunc &F) const { + // Must be a frem instruction with float or double arguments. + if (Opcode != Instruction::FRem || (!Ty->isDoubleTy() && !Ty->isFloatTy())) + return false; + + F = Ty->isDoubleTy() ? LibFunc_fmod : LibFunc_fmodf; + return true; +} + void TargetLibraryInfoImpl::disableAllFunctions() { memset(AvailableArray, 0, sizeof(AvailableArray)); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp index ad918ef7245b..426f98c0c628 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp @@ -126,7 +126,7 @@ static ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString, return ParseRet::None; } -/// The function looks for the following stringt at the beginning of +/// The function looks for the following string at the beginning of /// the input string `ParseString`: /// /// <token> <number> @@ -376,7 +376,7 @@ std::optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName, // _ZGV<isa><mask><vlen><parameters>_<scalarname>. StringRef VectorName = MangledName; - // Parse the fixed size part of the manled name + // Parse the fixed size part of the mangled name if (!MangledName.consume_front("_ZGV")) return std::nullopt; diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp index 9ae05a4b5ccc..cac2602d455f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp @@ -1826,6 +1826,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, Known.Zero &= ~Elt; Known.One &= Elt; } + if (Known.hasConflict()) + Known.resetAll(); return; } @@ -1849,6 +1851,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, Known.Zero &= ~Elt; Known.One &= Elt; } + if (Known.hasConflict()) + Known.resetAll(); return; } @@ -2368,19 +2372,12 @@ static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, const SimplifyQuery &Q, unsigned BitWidth, Value *X, Value *Y) { + // TODO: Move this case into isKnownNonEqual(). if (auto *C = dyn_cast<Constant>(X)) if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q)) return true; - KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); - if (XKnown.isUnknown()) - return false; - KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); - // If X != Y then X - Y is non zero. - std::optional<bool> ne = KnownBits::ne(XKnown, YKnown); - // If we are unable to compute if X != Y, we won't be able to do anything - // computing the knownbits of the sub expression so just return here. - return ne && *ne; + return ::isKnownNonEqual(X, Y, Depth, Q); } static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, @@ -3191,11 +3188,12 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, // Are any known bits in V1 contradictory to known bits in V2? If V1 // has a known zero where V2 has a known one, they must not be equal. KnownBits Known1 = computeKnownBits(V1, Depth, Q); - KnownBits Known2 = computeKnownBits(V2, Depth, Q); - - if (Known1.Zero.intersects(Known2.One) || - Known2.Zero.intersects(Known1.One)) - return true; + if (!Known1.isUnknown()) { + KnownBits Known2 = computeKnownBits(V2, Depth, Q); + if (Known1.Zero.intersects(Known2.One) || + Known2.Zero.intersects(Known1.One)) + return true; + } } if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q)) @@ -3205,6 +3203,13 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, isNonEqualPointersWithRecursiveGEP(V2, V1, Q)) return true; + Value *A, *B; + // PtrToInts are NonEqual if their Ptrs are NonEqual. + // Check PtrToInt type matches the pointer size. + if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) && + match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B)))) + return isKnownNonEqual(A, B, Depth + 1, Q); + return false; } @@ -6284,10 +6289,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { } /// Combine constant ranges from computeConstantRange() and computeKnownBits(). -static ConstantRange -computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V, - bool ForSigned, - const SimplifyQuery &SQ) { +ConstantRange +llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V, + bool ForSigned, + const SimplifyQuery &SQ) { ConstantRange CR1 = ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); @@ -6555,10 +6560,25 @@ static bool shiftAmountKnownInRange(const Value *ShiftAmount) { return Safe; } -static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly, +enum class UndefPoisonKind { + PoisonOnly = (1 << 0), + UndefOnly = (1 << 1), + UndefOrPoison = PoisonOnly | UndefOnly, +}; + +static bool includesPoison(UndefPoisonKind Kind) { + return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0; +} + +static bool includesUndef(UndefPoisonKind Kind) { + return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0; +} + +static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, bool ConsiderFlagsAndMetadata) { - if (ConsiderFlagsAndMetadata && Op->hasPoisonGeneratingFlagsOrMetadata()) + if (ConsiderFlagsAndMetadata && includesPoison(Kind) && + Op->hasPoisonGeneratingFlagsOrMetadata()) return true; unsigned Opcode = Op->getOpcode(); @@ -6568,7 +6588,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly, case Instruction::Shl: case Instruction::AShr: case Instruction::LShr: - return !shiftAmountKnownInRange(Op->getOperand(1)); + return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1)); case Instruction::FPToSI: case Instruction::FPToUI: // fptosi/ui yields poison if the resulting value does not fit in the @@ -6609,7 +6629,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly, return false; case Intrinsic::sshl_sat: case Intrinsic::ushl_sat: - return !shiftAmountKnownInRange(II->getArgOperand(1)); + return includesPoison(Kind) && + !shiftAmountKnownInRange(II->getArgOperand(1)); case Intrinsic::fma: case Intrinsic::fmuladd: case Intrinsic::sqrt: @@ -6664,18 +6685,16 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly, auto *VTy = cast<VectorType>(Op->getOperand(0)->getType()); unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp)); - if (!Idx || Idx->getValue().uge(VTy->getElementCount().getKnownMinValue())) - return true; + if (includesPoison(Kind)) + return !Idx || + Idx->getValue().uge(VTy->getElementCount().getKnownMinValue()); return false; } case Instruction::ShuffleVector: { - // shufflevector may return undef. - if (PoisonOnly) - return false; ArrayRef<int> Mask = isa<ConstantExpr>(Op) ? cast<ConstantExpr>(Op)->getShuffleMask() : cast<ShuffleVectorInst>(Op)->getShuffleMask(); - return is_contained(Mask, PoisonMaskElem); + return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem); } case Instruction::FNeg: case Instruction::PHI: @@ -6711,17 +6730,17 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly, bool llvm::canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata) { - return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false, + return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison, ConsiderFlagsAndMetadata); } bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) { - return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true, + return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly, ConsiderFlagsAndMetadata); } -static bool directlyImpliesPoison(const Value *ValAssumedPoison, - const Value *V, unsigned Depth) { +static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V, + unsigned Depth) { if (ValAssumedPoison == V) return true; @@ -6773,14 +6792,11 @@ bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) { return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0); } -static bool programUndefinedIfUndefOrPoison(const Value *V, - bool PoisonOnly); +static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly); -static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, - AssumptionCache *AC, - const Instruction *CtxI, - const DominatorTree *DT, - unsigned Depth, bool PoisonOnly) { +static bool isGuaranteedNotToBeUndefOrPoison( + const Value *V, AssumptionCache *AC, const Instruction *CtxI, + const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) { if (Depth >= MaxAnalysisRecursionDepth) return false; @@ -6795,16 +6811,19 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, } if (auto *C = dyn_cast<Constant>(V)) { + if (isa<PoisonValue>(C)) + return !includesPoison(Kind); + if (isa<UndefValue>(C)) - return PoisonOnly && !isa<PoisonValue>(C); + return !includesUndef(Kind); if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) || isa<ConstantPointerNull>(C) || isa<Function>(C)) return true; if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) - return (PoisonOnly ? !C->containsPoisonElement() - : !C->containsUndefOrPoisonElement()) && + return (!includesUndef(Kind) ? !C->containsPoisonElement() + : !C->containsUndefOrPoisonElement()) && !C->containsConstantExpression(); } @@ -6822,8 +6841,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, return true; auto OpCheck = [&](const Value *V) { - return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, - PoisonOnly); + return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind); }; if (auto *Opr = dyn_cast<Operator>(V)) { @@ -6845,14 +6863,16 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, for (unsigned i = 0; i < Num; ++i) { auto *TI = PN->getIncomingBlock(i)->getTerminator(); if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI, - DT, Depth + 1, PoisonOnly)) { + DT, Depth + 1, Kind)) { IsWellDefined = false; break; } } if (IsWellDefined) return true; - } else if (!canCreateUndefOrPoison(Opr) && all_of(Opr->operands(), OpCheck)) + } else if (!::canCreateUndefOrPoison(Opr, Kind, + /*ConsiderFlagsAndMetadata*/ true) && + all_of(Opr->operands(), OpCheck)) return true; } @@ -6862,7 +6882,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) return true; - if (programUndefinedIfUndefOrPoison(V, PoisonOnly)) + if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind))) return true; // CxtI may be null or a cloned instruction. @@ -6894,7 +6914,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, if (Cond) { if (Cond == V) return true; - else if (PoisonOnly && isa<Operator>(Cond)) { + else if (!includesUndef(Kind) && isa<Operator>(Cond)) { // For poison, we can analyze further auto *Opr = cast<Operator>(Cond); if (any_of(Opr->operands(), @@ -6916,20 +6936,22 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, const Instruction *CtxI, const DominatorTree *DT, unsigned Depth) { - return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false); + return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, + UndefPoisonKind::UndefOrPoison); } bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, const Instruction *CtxI, const DominatorTree *DT, unsigned Depth) { - return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true); + return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, + UndefPoisonKind::PoisonOnly); } bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC, const Instruction *CtxI, const DominatorTree *DT, unsigned Depth) { - // TODO: This is currently equivalent to isGuaranteedNotToBeUndefOrPoison(). - return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false); + return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, + UndefPoisonKind::UndefOnly); } /// Return true if undefined behavior would provably be executed on the path to diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp index 91d8c31fa062..f90fca9d937f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp @@ -12,6 +12,7 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -20,6 +21,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" @@ -1477,6 +1479,32 @@ void VFABI::getVectorVariantNames( } } +FunctionType *VFABI::createFunctionType(const VFInfo &Info, + const FunctionType *ScalarFTy) { + // Create vector parameter types + SmallVector<Type *, 8> VecTypes; + ElementCount VF = Info.Shape.VF; + int ScalarParamIndex = 0; + for (auto VFParam : Info.Shape.Parameters) { + if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { + VectorType *MaskTy = + VectorType::get(Type::getInt1Ty(ScalarFTy->getContext()), VF); + VecTypes.push_back(MaskTy); + continue; + } + + Type *OperandTy = ScalarFTy->getParamType(ScalarParamIndex++); + if (VFParam.ParamKind == VFParamKind::Vector) + OperandTy = VectorType::get(OperandTy, VF); + VecTypes.push_back(OperandTy); + } + + auto *RetTy = ScalarFTy->getReturnType(); + if (!RetTy->isVoidTy()) + RetTy = VectorType::get(RetTy, VF); + return FunctionType::get(RetTy, VecTypes, false); +} + bool VFShape::hasValidParameterList() const { for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams; ++Pos) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index d6f487c18b03..30ea7eef3a12 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -412,7 +412,7 @@ static uint32_t constructAbbreviationTag( const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet) { uint32_t AbbrvTag = 0; if (EntryRet) - AbbrvTag |= 1 << EntryRet->Endoding.Index; + AbbrvTag |= 1 << EntryRet->Encoding.Index; AbbrvTag |= 1 << dwarf::DW_IDX_die_offset; AbbrvTag |= Tag << LowerBitSize; return AbbrvTag; @@ -429,7 +429,7 @@ void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() { if (Abbreviations.count(AbbrvTag) == 0) { SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA; if (EntryRet) - UA.push_back(EntryRet->Endoding); + UA.push_back(EntryRet->Encoding); UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); Abbreviations.try_emplace(AbbrvTag, UA); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 61309c51336e..4dd27702786e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -583,6 +583,7 @@ bool AsmPrinter::doInitialization(Module &M) { [[fallthrough]]; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ZOS: ES = new DwarfCFIException(this); break; case ExceptionHandling::ARM: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 87a0ba58b14c..9037f752dc4f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1375,16 +1375,9 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { if (!MI.isDebugValue()) return false; - const DILocalVariable *Var = MI.getDebugVariable(); - const DIExpression *Expr = MI.getDebugExpression(); - const DILocation *DebugLoc = MI.getDebugLoc(); - const DILocation *InlinedAt = DebugLoc->getInlinedAt(); - assert(Var->isValidLocationForIntrinsic(DebugLoc) && + assert(MI.getDebugVariable()->isValidLocationForIntrinsic(MI.getDebugLoc()) && "Expected inlined-at fields to agree"); - DebugVariable V(Var, Expr, InlinedAt); - DbgValueProperties Properties(MI); - // If there are no instructions in this lexical scope, do no location tracking // at all, this variable shouldn't get a legitimate location range. auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get()); @@ -1417,7 +1410,7 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { } } } - VTracker->defVar(MI, Properties, DebugOps); + VTracker->defVar(MI, DbgValueProperties(MI), DebugOps); } // If performing final tracking of transfers, report this variable definition @@ -2420,7 +2413,7 @@ bool InstrRefBasedLDV::mlocJoin( // Pick out the first predecessors live-out value for this location. It's // guaranteed to not be a backedge, as we order by RPO. - ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()]; + ValueIDNum FirstVal = OutLocs[*BlockOrders[0]][Idx.asU64()]; // If we've already eliminated a PHI here, do no further checking, just // propagate the first live-in value into this block. @@ -2437,8 +2430,7 @@ bool InstrRefBasedLDV::mlocJoin( bool Disagree = false; for (unsigned int I = 1; I < BlockOrders.size(); ++I) { const MachineBasicBlock *PredMBB = BlockOrders[I]; - const ValueIDNum &PredLiveOut = - OutLocs[PredMBB->getNumber()][Idx.asU64()]; + const ValueIDNum &PredLiveOut = OutLocs[*PredMBB][Idx.asU64()]; // Incoming values agree, continue trying to eliminate this PHI. if (FirstVal == PredLiveOut) @@ -2563,7 +2555,7 @@ void InstrRefBasedLDV::placeMLocPHIs( auto InstallPHIsAtLoc = [&PHIBlocks, &MInLocs](LocIdx L) { for (const MachineBasicBlock *MBB : PHIBlocks) - MInLocs[MBB->getNumber()][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L); + MInLocs[*MBB][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L); }; // For locations with no reg units, just place PHIs. @@ -2642,7 +2634,8 @@ void InstrRefBasedLDV::buildMLocValueMap( // Initialize entry block to PHIs. These represent arguments. for (auto Location : MTracker->locations()) - MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx); + MInLocs.tableForEntryMBB()[Location.Idx.asU64()] = + ValueIDNum(0, 0, Location.Idx); MTracker->reset(); @@ -2671,7 +2664,7 @@ void InstrRefBasedLDV::buildMLocValueMap( // Join the values in all predecessor blocks. bool InLocsChanged; - InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]); + InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[*MBB]); InLocsChanged |= Visited.insert(MBB).second; // Don't examine transfer function if we've visited this loc at least @@ -2680,7 +2673,7 @@ void InstrRefBasedLDV::buildMLocValueMap( continue; // Load the current set of live-ins into MLocTracker. - MTracker->loadFromArray(MInLocs[CurBB], CurBB); + MTracker->loadFromArray(MInLocs[*MBB], CurBB); // Each element of the transfer function can be a new def, or a read of // a live-in value. Evaluate each element, and store to "ToRemap". @@ -2707,8 +2700,8 @@ void InstrRefBasedLDV::buildMLocValueMap( // the transfer function, and mlocJoin. bool OLChanged = false; for (auto Location : MTracker->locations()) { - OLChanged |= MOutLocs[CurBB][Location.Idx.asU64()] != Location.Value; - MOutLocs[CurBB][Location.Idx.asU64()] = Location.Value; + OLChanged |= MOutLocs[*MBB][Location.Idx.asU64()] != Location.Value; + MOutLocs[*MBB][Location.Idx.asU64()] = Location.Value; } MTracker->reset(); @@ -2851,7 +2844,6 @@ std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc( unsigned NumLocs = MTracker->getNumLocs(); for (const auto p : BlockOrders) { - unsigned ThisBBNum = p->getNumber(); auto OutValIt = LiveOuts.find(p); assert(OutValIt != LiveOuts.end()); const DbgValue &OutVal = *OutValIt->second; @@ -2870,7 +2862,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc( ValueIDNum ValToLookFor = OutValOp.ID; // Search the live-outs of the predecessor for the specified value. for (unsigned int I = 0; I < NumLocs; ++I) { - if (MOutLocs[ThisBBNum][I] == ValToLookFor) + if (MOutLocs[*p][I] == ValToLookFor) Locs.back().push_back(LocIdx(I)); } } else { @@ -2883,7 +2875,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc( // machine-value PHI locations. for (unsigned int I = 0; I < NumLocs; ++I) { ValueIDNum MPHI(MBB.getNumber(), 0, LocIdx(I)); - if (MOutLocs[ThisBBNum][I] == MPHI) + if (MOutLocs[*p][I] == MPHI) Locs.back().push_back(LocIdx(I)); } } @@ -3505,19 +3497,15 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( // Helper lambda for ejecting a block -- if nothing is going to use the block, // we can translate the variable location information into DBG_VALUEs and then // free all of InstrRefBasedLDV's data structures. - SmallPtrSet<const MachineBasicBlock *, 8> EjectedBBs; auto EjectBlock = [&](MachineBasicBlock &MBB) -> void { - if (EjectedBBs.insert(&MBB).second == false) - return; unsigned BBNum = MBB.getNumber(); AllTheVLocs[BBNum].clear(); // Prime the transfer-tracker, and then step through all the block // instructions, installing transfers. MTracker->reset(); - MTracker->loadFromArray(MInLocs[BBNum], BBNum); - TTracker->loadInlocs(MBB, MInLocs[BBNum], DbgOpStore, Output[BBNum], - NumLocs); + MTracker->loadFromArray(MInLocs[MBB], BBNum); + TTracker->loadInlocs(MBB, MInLocs[MBB], DbgOpStore, Output[BBNum], NumLocs); CurBB = BBNum; CurInst = 1; @@ -3528,8 +3516,8 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( } // Free machine-location tables for this block. - MInLocs[BBNum] = ValueTable(); - MOutLocs[BBNum] = ValueTable(); + MInLocs.ejectTableForBlock(MBB); + MOutLocs.ejectTableForBlock(MBB); // We don't need live-in variable values for this block either. Output[BBNum].clear(); AllTheVLocs[BBNum].clear(); @@ -3594,7 +3582,8 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( // anything for such out-of-scope blocks, but for the sake of being similar // to VarLocBasedLDV, eject these too. for (auto *MBB : ArtificialBlocks) - EjectBlock(*MBB); + if (MInLocs.hasTableFor(*MBB)) + EjectBlock(*MBB); return emitTransfers(AllVarsNumbering); } @@ -3693,8 +3682,8 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // machine values. The outer dimension is the block number; while the inner // dimension is a LocIdx from MLocTracker. unsigned NumLocs = MTracker->getNumLocs(); - FuncValueTable MOutLocs(MaxNumBlocks, ValueTable(NumLocs)); - FuncValueTable MInLocs(MaxNumBlocks, ValueTable(NumLocs)); + FuncValueTable MOutLocs(MaxNumBlocks, NumLocs); + FuncValueTable MInLocs(MaxNumBlocks, NumLocs); // Solve the machine value dataflow problem using the MLocTransfer function, // storing the computed live-ins / live-outs into the array-of-arrays. We use @@ -3732,7 +3721,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, CurBB = MBB.getNumber(); VTracker = &vlocs[CurBB]; VTracker->MBB = &MBB; - MTracker->loadFromArray(MInLocs[CurBB], CurBB); + MTracker->loadFromArray(MInLocs[MBB], CurBB); CurInst = 1; for (auto &MI : MBB) { process(MI, &MOutLocs, &MInLocs); @@ -3946,7 +3935,7 @@ public: /// Find the live-in value number for the given block. Looks up the value at /// the PHI location on entry. BlockValueNum getValue(LDVSSABlock *LDVBB) { - return MLiveIns[LDVBB->BB.getNumber()][Loc.asU64()].asU64(); + return MLiveIns[LDVBB->BB][Loc.asU64()].asU64(); } }; @@ -4186,8 +4175,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( }); for (auto &PHI : SortedPHIs) { - ValueIDNum ThisBlockValueNum = - MLiveIns[PHI->ParentBlock->BB.getNumber()][Loc.asU64()]; + ValueIDNum ThisBlockValueNum = MLiveIns[PHI->ParentBlock->BB][Loc.asU64()]; // Are all these things actually defined? for (auto &PHIIt : PHI->IncomingValues) { @@ -4196,7 +4184,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( return std::nullopt; ValueIDNum ValueToCheck; - const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; + const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB]; auto VVal = ValidatedValues.find(PHIIt.first); if (VVal == ValidatedValues.end()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index d6dbb1feda3e..ccc284b62331 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -207,9 +207,48 @@ using namespace llvm; /// Type for a table of values in a block. using ValueTable = SmallVector<ValueIDNum, 0>; -/// Type for a table-of-table-of-values, i.e., the collection of either -/// live-in or live-out values for each block in the function. -using FuncValueTable = SmallVector<ValueTable, 0>; +/// A collection of ValueTables, one per BB in a function, with convenient +/// accessor methods. +struct FuncValueTable { + FuncValueTable(int NumBBs, int NumLocs) { + Storage.reserve(NumBBs); + for (int i = 0; i != NumBBs; ++i) + Storage.push_back( + std::make_unique<ValueTable>(NumLocs, ValueIDNum::EmptyValue)); + } + + /// Returns the ValueTable associated with MBB. + ValueTable &operator[](const MachineBasicBlock &MBB) const { + return (*this)[MBB.getNumber()]; + } + + /// Returns the ValueTable associated with the MachineBasicBlock whose number + /// is MBBNum. + ValueTable &operator[](int MBBNum) const { + auto &TablePtr = Storage[MBBNum]; + assert(TablePtr && "Trying to access a deleted table"); + return *TablePtr; + } + + /// Returns the ValueTable associated with the entry MachineBasicBlock. + ValueTable &tableForEntryMBB() const { return (*this)[0]; } + + /// Returns true if the ValueTable associated with MBB has not been freed. + bool hasTableFor(MachineBasicBlock &MBB) const { + return Storage[MBB.getNumber()] != nullptr; + } + + /// Frees the memory of the ValueTable associated with MBB. + void ejectTableForBlock(const MachineBasicBlock &MBB) { + Storage[MBB.getNumber()].reset(); + } + +private: + /// ValueTables are stored as unique_ptrs to allow for deallocation during + /// LDV; this was measured to have a significant impact on compiler memory + /// usage. + SmallVector<std::unique_ptr<ValueTable>, 0> Storage; +}; /// Thin wrapper around an integer -- designed to give more type safety to /// spill location numbers. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp index 0203034b5a01..643370f0573d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -426,8 +426,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Erase any virtregs that are now empty and unused. There may be <undef> // uses around. Keep the empty live range in that case. - for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { - Register Reg = RegsToErase[i]; + for (Register Reg : RegsToErase) { if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { ToShrink.remove(&LIS.getInterval(Reg)); eraseVirtReg(Reg); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp index f3b5069d351b..af0b0a20c856 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -13,7 +13,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LowerEmuTLS.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" @@ -24,7 +28,7 @@ using namespace llvm; -#define DEBUG_TYPE "loweremutls" +#define DEBUG_TYPE "lower-emutls" namespace { @@ -36,22 +40,41 @@ public: } bool runOnModule(Module &M) override; -private: - bool addEmuTlsVar(Module &M, const GlobalVariable *GV); - static void copyLinkageVisibility(Module &M, - const GlobalVariable *from, - GlobalVariable *to) { - to->setLinkage(from->getLinkage()); - to->setVisibility(from->getVisibility()); - to->setDSOLocal(from->isDSOLocal()); - if (from->hasComdat()) { - to->setComdat(M.getOrInsertComdat(to->getName())); - to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind()); - } - } }; } +static bool addEmuTlsVar(Module &M, const GlobalVariable *GV); + +static void copyLinkageVisibility(Module &M, const GlobalVariable *from, + GlobalVariable *to) { + to->setLinkage(from->getLinkage()); + to->setVisibility(from->getVisibility()); + to->setDSOLocal(from->isDSOLocal()); + if (from->hasComdat()) { + to->setComdat(M.getOrInsertComdat(to->getName())); + to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind()); + } +} + +PreservedAnalyses LowerEmuTLSPass::run(Module &M, ModuleAnalysisManager &MAM) { + bool Changed = false; + SmallVector<const GlobalVariable *, 8> TlsVars; + for (const auto &G : M.globals()) { + if (G.isThreadLocal()) + TlsVars.push_back(&G); + } + for (const auto *G : TlsVars) + Changed |= addEmuTlsVar(M, G); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA = PreservedAnalyses::all(); + PA.abandon<GlobalsAA>(); + PA.abandon<ModuleSummaryIndexAnalysis>(); + PA.abandon<StackSafetyGlobalAnalysis>(); + return PA; +} + char LowerEmuTLS::ID = 0; INITIALIZE_PASS(LowerEmuTLS, DEBUG_TYPE, @@ -83,7 +106,7 @@ bool LowerEmuTLS::runOnModule(Module &M) { return Changed; } -bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { +bool addEmuTlsVar(Module &M, const GlobalVariable *GV) { LLVMContext &C = M.getContext(); PointerType *VoidPtrType = PointerType::getUnqual(C); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp index b9db34f7be95..6eeed8b5c3f7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -208,8 +208,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, } SmallSet<Register, 32> Added; - for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { - Register Reg = LocalDefs[i]; + for (Register Reg : LocalDefs) { if (Added.insert(Reg).second) { // If it's not live beyond end of the bundle, mark it dead. bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); @@ -218,8 +217,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, } } - for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) { - Register Reg = ExternUses[i]; + for (Register Reg : ExternUses) { bool isKill = KilledUseSet.count(Reg); bool isUndef = UndefUseSet.count(Reg); MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) | diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp index aff4d95781f4..5bd6ca0978a4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp @@ -212,15 +212,9 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) } std::unique_ptr<ScheduleDAGMutation> -llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates) { +llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates, + bool BranchOnly) { if (EnableMacroFusion) - return std::make_unique<MacroFusion>(Predicates, true); - return nullptr; -} - -std::unique_ptr<ScheduleDAGMutation> llvm::createBranchMacroFusionDAGMutation( - ArrayRef<MacroFusionPredTy> Predicates) { - if (EnableMacroFusion) - return std::make_unique<MacroFusion>(Predicates, false); + return std::make_unique<MacroFusion>(Predicates, !BranchOnly); return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp index 40c42cabf776..e81d47930136 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp @@ -62,6 +62,118 @@ static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", namespace { +/// Assign ascending index for instructions in machine basic block. The index +/// can be used to determine dominance between instructions in same MBB. +class InstrPosIndexes { +public: + void unsetInitialized() { IsInitialized = false; } + + void init(const MachineBasicBlock &MBB) { + CurMBB = &MBB; + Instr2PosIndex.clear(); + uint64_t LastIndex = 0; + for (const MachineInstr &MI : MBB) { + LastIndex += InstrDist; + Instr2PosIndex[&MI] = LastIndex; + } + } + + /// Set \p Index to index of \p MI. If \p MI is new inserted, it try to assign + /// index without affecting existing instruction's index. Return true if all + /// instructions index has been reassigned. + bool getIndex(const MachineInstr &MI, uint64_t &Index) { + if (!IsInitialized) { + init(*MI.getParent()); + IsInitialized = true; + Index = Instr2PosIndex.at(&MI); + return true; + } + + assert(MI.getParent() == CurMBB && "MI is not in CurMBB"); + auto It = Instr2PosIndex.find(&MI); + if (It != Instr2PosIndex.end()) { + Index = It->second; + return false; + } + + // Distance is the number of consecutive unassigned instructions including + // MI. Start is the first instruction of them. End is the next of last + // instruction of them. + // e.g. + // |Instruction| A | B | C | MI | D | E | + // | Index | 1024 | | | | | 2048 | + // + // In this case, B, C, MI, D are unassigned. Distance is 4, Start is B, End + // is E. + unsigned Distance = 1; + MachineBasicBlock::const_iterator Start = MI.getIterator(), + End = std::next(Start); + while (Start != CurMBB->begin() && + !Instr2PosIndex.count(&*std::prev(Start))) { + --Start; + ++Distance; + } + while (End != CurMBB->end() && !Instr2PosIndex.count(&*(End))) { + ++End; + ++Distance; + } + + // LastIndex is initialized to last used index prior to MI or zero. + // In previous example, LastIndex is 1024, EndIndex is 2048; + uint64_t LastIndex = + Start == CurMBB->begin() ? 0 : Instr2PosIndex.at(&*std::prev(Start)); + uint64_t Step; + if (End == CurMBB->end()) + Step = static_cast<uint64_t>(InstrDist); + else { + // No instruction uses index zero. + uint64_t EndIndex = Instr2PosIndex.at(&*End); + assert(EndIndex > LastIndex && "Index must be ascending order"); + unsigned NumAvailableIndexes = EndIndex - LastIndex - 1; + // We want index gap between two adjacent MI is as same as possible. Given + // total A available indexes, D is number of consecutive unassigned + // instructions, S is the step. + // |<- S-1 -> MI <- S-1 -> MI <- A-S*D ->| + // There're S-1 available indexes between unassigned instruction and its + // predecessor. There're A-S*D available indexes between the last + // unassigned instruction and its successor. + // Ideally, we want + // S-1 = A-S*D + // then + // S = (A+1)/(D+1) + // An valid S must be integer greater than zero, so + // S <= (A+1)/(D+1) + // => + // A-S*D >= 0 + // That means we can safely use (A+1)/(D+1) as step. + // In previous example, Step is 204, Index of B, C, MI, D is 1228, 1432, + // 1636, 1840. + Step = (NumAvailableIndexes + 1) / (Distance + 1); + } + + // Reassign index for all instructions if number of new inserted + // instructions exceed slot or all instructions are new. + if (LLVM_UNLIKELY(!Step || (!LastIndex && Step == InstrDist))) { + init(*CurMBB); + Index = Instr2PosIndex.at(&MI); + return true; + } + + for (auto I = Start; I != End; ++I) { + LastIndex += Step; + Instr2PosIndex[&*I] = LastIndex; + } + Index = Instr2PosIndex.at(&MI); + return false; + } + +private: + bool IsInitialized = false; + enum { InstrDist = 1024 }; + const MachineBasicBlock *CurMBB = nullptr; + DenseMap<const MachineInstr *, uint64_t> Instr2PosIndex; +}; + class RegAllocFast : public MachineFunctionPass { public: static char ID; @@ -153,6 +265,9 @@ private: // Register masks attached to the current instruction. SmallVector<const uint32_t *> RegMasks; + // Assign index for each instruction to quickly determine dominance. + InstrPosIndexes PosIndexes; + void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); bool isPhysRegFree(MCPhysReg PhysReg) const; @@ -339,18 +454,13 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) { return FrameIdx; } -static bool dominates(MachineBasicBlock &MBB, - MachineBasicBlock::const_iterator A, - MachineBasicBlock::const_iterator B) { - auto MBBEnd = MBB.end(); - if (B == MBBEnd) - return true; - - MachineBasicBlock::const_iterator I = MBB.begin(); - for (; &*I != A && &*I != B; ++I) - ; - - return &*I == A; +static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, + const MachineInstr &B) { + uint64_t IndexA, IndexB; + PosIndexes.getIndex(A, IndexA); + if (LLVM_UNLIKELY(PosIndexes.getIndex(B, IndexB))) + PosIndexes.getIndex(A, IndexA); + return IndexA < IndexB; } /// Returns false if \p VirtReg is known to not live out of the current block. @@ -371,7 +481,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return true; } else { - if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef)) + if (!SelfLoopDef || dominates(PosIndexes, DefInst, *SelfLoopDef)) SelfLoopDef = &DefInst; } } @@ -396,7 +506,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { // Try to handle some simple cases to avoid spilling and reloading every // value inside a self looping block. if (SelfLoopDef == &UseInst || - !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) { + !dominates(PosIndexes, *SelfLoopDef, UseInst)) { MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return true; } @@ -1565,6 +1675,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); + PosIndexes.unsetInitialized(); RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp index fba8c35ecec2..17a9f55cccc0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -165,8 +165,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { assert(RCI.NumRegs <= NumRegs && "Allocation order larger than regclass"); // CSR aliases go after the volatile registers, preserve the target's order. - for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) { - unsigned PhysReg = CSRAlias[i]; + for (unsigned PhysReg : CSRAlias) { uint8_t Cost = RegCosts[PhysReg]; if (Cost != LastCost) LastCostChange = N; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp index c1af37c8510f..3fbb93795075 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -305,7 +305,11 @@ namespace { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); + /// + /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG + /// SrcReg. This introduces an implicit-def of DstReg on coalesced users. + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, + bool IsSubregToReg); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1343,8 +1347,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (DstReg.isPhysical()) { Register NewDstReg = DstReg; - unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), - DefMI->getOperand(0).getSubReg()); + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx); if (NewDstIdx) NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); @@ -1493,7 +1496,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MRI->setRegClass(DstReg, NewRC); // Update machine operands and add flags. - updateRegDefsUses(DstReg, DstReg, DstIdx); + updateRegDefsUses(DstReg, DstReg, DstIdx, false); NewMI.getOperand(0).setSubReg(NewIdx); // updateRegDefUses can add an "undef" flag to the definition, since // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make @@ -1618,8 +1621,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, NewMI.addOperand(MO); SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); - for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { - MCRegister Reg = NewMIImplDefs[i]; + for (MCRegister Reg : NewMIImplDefs) { for (MCRegUnit Unit : TRI->regunits(Reg)) if (LiveRange *LR = LIS->getCachedRegUnit(Unit)) LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); @@ -1814,7 +1816,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx) { + unsigned SubIdx, bool IsSubregToReg) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); @@ -1854,6 +1856,8 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); + bool FullDef = true; + // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(Ops[i]); @@ -1861,9 +1865,13 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // Adjust <undef> flags in case of sub-register joins. We don't want to // turn a full def into a read-modify-write sub-register def and vice // versa. - if (SubIdx && MO.isDef()) + if (SubIdx && MO.isDef()) { MO.setIsUndef(!Reads); + if (!Reads) + FullDef = false; + } + // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. if (MO.isUse() && !DstIsPhys) { @@ -1895,6 +1903,25 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, MO.substVirtReg(DstReg, SubIdx, *TRI); } + if (IsSubregToReg && !FullDef) { + // If the coalesed instruction doesn't fully define the register, we need + // to preserve the original super register liveness for SUBREG_TO_REG. + // + // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, + // but it introduces liveness for other subregisters. Downstream users may + // have been relying on those bits, so we need to ensure their liveness is + // captured with a def of other lanes. + + // FIXME: Need to add new subrange if tracking subranges. We could also + // skip adding this if we knew the other lanes are dead, and only for + // other lanes. + + assert(!MRI->shouldTrackSubRegLiveness(DstReg) && + "this should update subranges"); + MachineInstrBuilder MIB(*MF, UseMI); + MIB.addReg(DstReg, RegState::ImplicitDefine); + } + LLVM_DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugInstr()) @@ -2094,6 +2121,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); } + const bool IsSubregToReg = CopyMI->isSubregToReg(); + ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2161,9 +2190,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) - updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + if (CP.getDstIdx()) { + assert(!IsSubregToReg && "can this happen?"); + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false); + } + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), + IsSubregToReg); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { @@ -4236,8 +4268,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { InflateRegs.end()); LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); - for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { - Register Reg = InflateRegs[i]; + for (Register Reg : InflateRegs) { if (MRI->reg_nodbg_empty(Reg)) continue; if (MRI->recomputeRegClass(Reg)) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 36c91b7fa97e..893aa4a91828 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -15,14 +15,17 @@ #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -38,138 +41,137 @@ STATISTIC(NumTLIFuncDeclAdded, STATISTIC(NumFuncUsedAdded, "Number of functions added to `llvm.compiler.used`"); -static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) { - Module *M = CI.getModule(); - - Function *OldFunc = CI.getCalledFunction(); - - // Check if the vector library function is already declared in this module, - // otherwise insert it. +/// Returns a vector Function that it adds to the Module \p M. When an \p +/// ScalarFunc is not null, it copies its attributes to the newly created +/// Function. +Function *getTLIFunction(Module *M, FunctionType *VectorFTy, + const StringRef TLIName, + Function *ScalarFunc = nullptr) { Function *TLIFunc = M->getFunction(TLIName); if (!TLIFunc) { - TLIFunc = Function::Create(OldFunc->getFunctionType(), - Function::ExternalLinkage, TLIName, *M); - TLIFunc->copyAttributesFrom(OldFunc); + TLIFunc = + Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M); + if (ScalarFunc) + TLIFunc->copyAttributesFrom(ScalarFunc); LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `" << TLIName << "` of type `" << *(TLIFunc->getType()) << "` to module.\n"); ++NumTLIFuncDeclAdded; - - // Add the freshly created function to llvm.compiler.used, - // similar to as it is done in InjectTLIMappings + // Add the freshly created function to llvm.compiler.used, similar to as it + // is done in InjectTLIMappings. appendToCompilerUsed(*M, {TLIFunc}); - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName << "` to `@llvm.compiler.used`.\n"); ++NumFuncUsedAdded; } + return TLIFunc; +} - // Replace the call to the vector intrinsic with a call - // to the corresponding function from the vector library. - IRBuilder<> IRBuilder(&CI); - SmallVector<Value *> Args(CI.args()); - // Preserve the operand bundles. - SmallVector<OperandBundleDef, 1> OpBundles; - CI.getOperandBundlesAsDefs(OpBundles); - CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles); - assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() && - "Expecting function types to be identical"); - CI.replaceAllUsesWith(Replacement); - if (isa<FPMathOperator>(Replacement)) { - // Preserve fast math flags for FP math. - Replacement->copyFastMathFlags(&CI); +/// Replace the call to the vector intrinsic ( \p CalltoReplace ) with a call to +/// the corresponding function from the vector library ( \p TLIVecFunc ). +static void replaceWithTLIFunction(CallInst &CalltoReplace, VFInfo &Info, + Function *TLIVecFunc) { + IRBuilder<> IRBuilder(&CalltoReplace); + SmallVector<Value *> Args(CalltoReplace.args()); + if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) { + auto *MaskTy = VectorType::get(Type::getInt1Ty(CalltoReplace.getContext()), + Info.Shape.VF); + Args.insert(Args.begin() + OptMaskpos.value(), + Constant::getAllOnesValue(MaskTy)); } - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" - << OldFunc->getName() << "` with call to `" << TLIName - << "`.\n"); - ++NumCallsReplaced; - return true; + // Preserve the operand bundles. + SmallVector<OperandBundleDef, 1> OpBundles; + CalltoReplace.getOperandBundlesAsDefs(OpBundles); + CallInst *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles); + CalltoReplace.replaceAllUsesWith(Replacement); + // Preserve fast math flags for FP math. + if (isa<FPMathOperator>(Replacement)) + Replacement->copyFastMathFlags(&CalltoReplace); } +/// Returns true when successfully replaced \p CallToReplace with a suitable +/// function taking vector arguments, based on available mappings in the \p TLI. +/// Currently only works when \p CallToReplace is a call to vectorized +/// intrinsic. static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, - CallInst &CI) { - if (!CI.getCalledFunction()) { + CallInst &CallToReplace) { + if (!CallToReplace.getCalledFunction()) return false; - } - auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID(); - if (IntrinsicID == Intrinsic::not_intrinsic) { - // Replacement is only performed for intrinsic functions + auto IntrinsicID = CallToReplace.getCalledFunction()->getIntrinsicID(); + // Replacement is only performed for intrinsic functions. + if (IntrinsicID == Intrinsic::not_intrinsic) return false; - } - // Convert vector arguments to scalar type and check that - // all vector operands have identical vector width. + // Compute arguments types of the corresponding scalar call. Additionally + // checks if in the vector call, all vector operands have the same EC. ElementCount VF = ElementCount::getFixed(0); - SmallVector<Type *> ScalarTypes; - for (auto Arg : enumerate(CI.args())) { - auto *ArgType = Arg.value()->getType(); - // Vector calls to intrinsics can still have - // scalar operands for specific arguments. + SmallVector<Type *> ScalarArgTypes; + for (auto Arg : enumerate(CallToReplace.args())) { + auto *ArgTy = Arg.value()->getType(); if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) { - ScalarTypes.push_back(ArgType); - } else { - // The argument in this place should be a vector if - // this is a call to a vector intrinsic. - auto *VectorArgTy = dyn_cast<VectorType>(ArgType); - if (!VectorArgTy) { - // The argument is not a vector, do not perform - // the replacement. - return false; - } - ElementCount NumElements = VectorArgTy->getElementCount(); - if (NumElements.isScalable()) { - // The current implementation does not support - // scalable vectors. + ScalarArgTypes.push_back(ArgTy); + } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) { + ScalarArgTypes.push_back(ArgTy->getScalarType()); + // Disallow vector arguments with different VFs. When processing the first + // vector argument, store it's VF, and for the rest ensure that they match + // it. + if (VF.isZero()) + VF = VectorArgTy->getElementCount(); + else if (VF != VectorArgTy->getElementCount()) return false; - } - if (VF.isNonZero() && VF != NumElements) { - // The different arguments differ in vector size. - return false; - } else { - VF = NumElements; - } - ScalarTypes.push_back(VectorArgTy->getElementType()); - } + } else + // Exit when it is supposed to be a vector argument but it isn't. + return false; } - // Try to reconstruct the name for the scalar version of this - // intrinsic using the intrinsic ID and the argument types - // converted to scalar above. - std::string ScalarName; - if (Intrinsic::isOverloaded(IntrinsicID)) { - ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule()); - } else { - ScalarName = Intrinsic::getName(IntrinsicID).str(); - } + // Try to reconstruct the name for the scalar version of this intrinsic using + // the intrinsic ID and the argument types converted to scalar above. + std::string ScalarName = + (Intrinsic::isOverloaded(IntrinsicID) + ? Intrinsic::getName(IntrinsicID, ScalarArgTypes, + CallToReplace.getModule()) + : Intrinsic::getName(IntrinsicID).str()); + + // Try to find the mapping for the scalar version of this intrinsic and the + // exact vector width of the call operands in the TargetLibraryInfo. First, + // check with a non-masked variant, and if that fails try with a masked one. + const VecDesc *VD = + TLI.getVectorMappingInfo(ScalarName, VF, /*Masked*/ false); + if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, VF, /*Masked*/ true))) + return false; - if (!TLI.isFunctionVectorizable(ScalarName)) { - // The TargetLibraryInfo does not contain a vectorized version of - // the scalar function. + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName + << "` and vector width " << VF << " to: `" + << VD->getVectorFnName() << "`.\n"); + + // Replace the call to the intrinsic with a call to the vector library + // function. + Type *ScalarRetTy = CallToReplace.getType()->getScalarType(); + FunctionType *ScalarFTy = + FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false); + const std::string MangledName = VD->getVectorFunctionABIVariantString(); + auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy); + if (!OptInfo) return false; - } - // Try to find the mapping for the scalar version of this intrinsic - // and the exact vector width of the call operands in the - // TargetLibraryInfo. - StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF); - - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `" - << ScalarName << "` and vector width " << VF << ".\n"); - - if (!TLIName.empty()) { - // Found the correct mapping in the TargetLibraryInfo, - // replace the call to the intrinsic with a call to - // the vector library function. - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName - << "`.\n"); - return replaceWithTLIFunction(CI, TLIName); - } + FunctionType *VectorFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy); + if (!VectorFTy) + return false; + + Function *FuncToReplace = CallToReplace.getCalledFunction(); + Function *TLIFunc = getTLIFunction(CallToReplace.getModule(), VectorFTy, + VD->getVectorFnName(), FuncToReplace); + replaceWithTLIFunction(CallToReplace, *OptInfo, TLIFunc); - return false; + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" + << FuncToReplace->getName() << "` with call to `" + << TLIFunc->getName() << "`.\n"); + ++NumCallsReplaced; + return true; } static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { @@ -185,9 +187,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { } // Erase the calls to the intrinsics that have been replaced // with calls to the vector library. - for (auto *CI : ReplacedCalls) { + for (auto *CI : ReplacedCalls) CI->eraseFromParent(); - } return Changed; } @@ -207,10 +208,10 @@ PreservedAnalyses ReplaceWithVeclib::run(Function &F, PA.preserve<DemandedBitsAnalysis>(); PA.preserve<OptimizationRemarkEmitterAnalysis>(); return PA; - } else { - // The pass did not replace any calls, hence it preserves all analyses. - return PreservedAnalyses::all(); } + + // The pass did not replace any calls, hence it preserves all analyses. + return PreservedAnalyses::all(); } //////////////////////////////////////////////////////////////////////////////// diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c782ad117ce6..0d46c7868d87 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13703,8 +13703,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && N0.getOperand(1).getOpcode() == ISD::Constant && - (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), - N0.getValueType()) || + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) || !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); @@ -13935,8 +13934,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && N0.getOperand(1).getOpcode() == ISD::Constant && - !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), - N0.getValueType())) { + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) { SDLoc DL(N); SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT); SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1)); @@ -14759,6 +14757,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Attempt to pre-truncate BUILD_VECTOR sources. if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && + N0.hasOneUse() && TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && // Avoid creating illegal types if running after type legalizer. (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) { @@ -14818,11 +14817,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (SDValue Reduced = reduceLoadWidth(N)) return Reduced; - // Handle the case where the load remains an extending load even - // after truncation. + // Handle the case where the truncated result is at least as wide as the + // loaded type. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { auto *LN0 = cast<LoadSDNode>(N0); - if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) { + if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) { SDValue NewLoad = DAG.getExtLoad( LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand()); @@ -15165,8 +15164,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { *LN0->getMemOperand())) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getAlign(), - LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); + LN0->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } @@ -18855,8 +18853,7 @@ struct LoadedSlice { void addSliceGain(const LoadedSlice &LS) { // Each slice saves a truncate. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); - if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), - LS.Inst->getValueType(0))) + if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0))) ++Truncates; // If there is a shift amount, this slice gets rid of it. if (LS.Shift) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index a83129586339..f3d8edb8926b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1000,8 +1000,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { if (!CanLowerReturn) return false; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; + for (EVT VT : RetTys) { MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 03cba892a167..5926a6058111 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -377,8 +377,7 @@ Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); Register FirstReg; - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; + for (EVT ValueVT : ValueVTs) { MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT); unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5e1f9fbcdde0..4e317062cec4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -913,14 +913,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // normal undefined upper bits behavior to allow using an in-reg extend // with the illegal FP type, so load as an integer and do the // from-integer conversion. - if (SrcVT.getScalarType() == MVT::f16) { + EVT SVT = SrcVT.getScalarType(); + if (SVT == MVT::f16 || SVT == MVT::bf16) { EVT ISrcVT = SrcVT.changeTypeToInteger(); EVT IDestVT = DestVT.changeTypeToInteger(); EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain, Ptr, ISrcVT, LD->getMemOperand()); - Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); + Value = + DAG.getNode(SVT == MVT::f16 ? ISD::FP16_TO_FP : ISD::BF16_TO_FP, + dl, DestVT, Result); Chain = Result.getValue(1); break; } @@ -4905,7 +4908,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { static MVT getPromotedVectorElementType(const TargetLowering &TLI, MVT EltVT, MVT NewEltVT) { unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits(); - MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); + MVT MidVT = OldEltsPerNewElt == 1 + ? NewEltVT + : MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); assert(TLI.isTypeLegal(MidVT) && "unexpected"); return MidVT; } @@ -5349,6 +5354,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: + case ISD::FCANONICALIZE: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back( @@ -5391,7 +5397,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && "Invalid promote type for build_vector"); - assert(NewEltVT.bitsLT(EltVT) && "not handled"); + assert(NewEltVT.bitsLE(EltVT) && "not handled"); MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); @@ -5402,7 +5408,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } SDLoc SL(Node); - SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps); + SDValue Concat = + DAG.getNode(MidVT == NewEltVT ? ISD::BUILD_VECTOR : ISD::CONCAT_VECTORS, + SL, NVT, NewOps); SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); Results.push_back(CvtVec); break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c4605a6b9598..65919a64b806 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2214,6 +2214,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT_SAT: R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break; case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; + case ISD::STRICT_FP_EXTEND: + R = PromoteFloatOp_STRICT_FP_EXTEND(N, OpNo); + break; case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; @@ -2276,6 +2279,26 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op); } +SDValue DAGTypeLegalizer::PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1); + + SDValue Op = GetPromotedFloat(N->getOperand(1)); + EVT VT = N->getValueType(0); + + // Desired VT is same as promoted type. Use promoted float directly. + if (VT == Op->getValueType(0)) { + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + return Op; + } + + // Else, extend the promoted float value to the desired VT. + SDValue Res = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), N->getVTList(), + N->getOperand(0), Op); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + // Promote the float operands used for comparison. The true- and false- // operands have the same type as the result and are promoted, if needed, by // PromoteFloatRes_SELECT_CC diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 362fa92dd44b..3d21bd22e6ef 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1871,6 +1871,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::EXPERIMENTAL_VP_STRIDED_STORE: Res = PromoteIntOp_VP_STRIDED(N, OpNo); break; + case ISD::EXPERIMENTAL_VP_SPLICE: + Res = PromoteIntOp_VP_SPLICE(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2549,6 +2552,20 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo) { + SmallVector<SDValue, 6> NewOps(N->op_begin(), N->op_end()); + + if (OpNo == 2) { // Offset operand + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + } + + assert((OpNo == 4 || OpNo == 5) && "Unexpected operand for promotion"); + + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 9d5931b44ac6..84b1b2c71fd0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -410,6 +410,7 @@ private: SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -712,6 +713,7 @@ private: SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index ab4c33c9e976..f73ddfee2b90 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -296,28 +296,24 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (isNewLoad) AddPred(LoadSU, ChainPred); } - for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { - const SDep &Pred = LoadPreds[i]; + for (const SDep &Pred : LoadPreds) { RemovePred(SU, Pred); if (isNewLoad) { AddPred(LoadSU, Pred); } } - for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { - const SDep &Pred = NodePreds[i]; + for (const SDep &Pred : NodePreds) { RemovePred(SU, Pred); AddPred(NewSU, Pred); } - for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { - SDep D = NodeSuccs[i]; + for (SDep D : NodeSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); D.setSUnit(NewSU); AddPred(SuccDep, D); } - for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { - SDep D = ChainSuccs[i]; + for (SDep D : ChainSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5be1892a44f6..81facf92e55a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6858,8 +6858,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // expanding copies of large vectors from registers. This only works for // fixed length vectors, since we need to know the exact number of // elements. - if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() && - N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) { + if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getOperand(0).getValueType().isFixedLengthVector()) { unsigned Factor = N1.getOperand(0).getValueType().getVectorNumElements(); return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, @@ -6976,7 +6976,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of // the concat have the same type as the extract. - if (N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0 && + if (N1.getOpcode() == ISD::CONCAT_VECTORS && VT == N1.getOperand(0).getValueType()) { unsigned Factor = VT.getVectorMinNumElements(); return N1.getOperand(N2C->getZExtValue() / Factor); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 12ed4a82ee91..3c4b285cb067 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -10627,8 +10627,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (CLI.RetZExt) AssertOp = ISD::AssertZext; unsigned CurReg = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; + for (EVT VT : RetTys) { MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index a1cf4cbbee1b..3dc6e4bbcf46 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2786,7 +2786,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, Val = decodeSignRotatedValue(Val); ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); - return C && C->getSExtValue() == Val; + return C && C->getAPIntValue().trySExtValue() == Val; } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -3612,12 +3612,24 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), VT), nullptr)); continue; } - case OPC_EmitRegister: { - MVT::SimpleValueType VT = - static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); + case OPC_EmitRegister: + case OPC_EmitRegisterI32: + case OPC_EmitRegisterI64: { + MVT::SimpleValueType VT; + switch (Opcode) { + case OPC_EmitRegisterI32: + VT = MVT::i32; + break; + case OPC_EmitRegisterI64: + VT = MVT::i64; + break; + default: + VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); + break; + } unsigned RegNo = MatcherTable[MatcherIndex++]; - RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getRegister(RegNo, VT), nullptr)); + RecordedNodes.push_back(std::pair<SDValue, SDNode *>( + CurDAG->getRegister(RegNo, VT), nullptr)); continue; } case OPC_EmitRegister2: { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 38f658084294..d4840d117110 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -115,7 +115,7 @@ Constant *ShadowStackGCLowering::GetFrameMap(Function &F) { Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); if (!C->isNullValue()) NumMeta = I + 1; - Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + Metadata.push_back(C); } Metadata.resize(NumMeta); @@ -173,7 +173,7 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { bool ShadowStackGCLowering::doInitialization(Module &M) { bool Active = false; for (Function &F : M) { - if (F.hasGC() && F.getGC() == std::string("shadow-stack")) { + if (F.hasGC() && F.getGC() == "shadow-stack") { Active = true; break; } @@ -292,8 +292,7 @@ void ShadowStackGCLowering::getAnalysisUsage(AnalysisUsage &AU) const { /// runOnFunction - Insert code to maintain the shadow stack. bool ShadowStackGCLowering::runOnFunction(Function &F) { // Quick exit for functions that do not use the shadow stack GC. - if (!F.hasGC() || - F.getGC() != std::string("shadow-stack")) + if (!F.hasGC() || F.getGC() != "shadow-stack") return false; LLVMContext &Context = F.getContext(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 9a0dd92bb58e..6e69dc66429d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2681,6 +2681,13 @@ MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal( return SelectSectionForGlobal(GO, Kind, TM); } +MCSection *TargetLoweringObjectFileGOFF::getSectionForLSDA( + const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const { + std::string Name = ".gcc_exception_table." + F.getName().str(); + return getContext().getGOFFSection(Name, SectionKind::getData(), nullptr, + nullptr); +} + MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { auto *Symbol = TM.getSymbol(GO); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp index faa5466b69e8..4003a08a5422 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -947,6 +947,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: case ExceptionHandling::AIX: + case ExceptionHandling::ZOS: addPass(createDwarfEHPass(getOptLevel())); break; case ExceptionHandling::WinEH: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index bf689dbd308f..526cb847e8a0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1124,8 +1124,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI( } } - for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) { - Register MOReg = OtherDefs[i]; + for (Register MOReg : OtherDefs) { if (regOverlapsSet(Uses, MOReg)) return false; if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg)) diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp index 409bec7a874b..809b2d51f059 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -188,21 +188,41 @@ private: Edge::Kind DeltaKind; Symbol *TargetSymbol; uint64_t Addend; + + bool FixingFromSymbol = true; if (&BlockToFix == &FromSymbol->getAddressable()) { + if (LLVM_UNLIKELY(&BlockToFix == &ToSymbol->getAddressable())) { + // From and To are symbols in the same block. Decide direction by offset + // instead. + if (ToSymbol->getAddress() > FixupAddress) + FixingFromSymbol = true; + else if (FromSymbol->getAddress() > FixupAddress) + FixingFromSymbol = false; + else + FixingFromSymbol = FromSymbol->getAddress() >= ToSymbol->getAddress(); + } else + FixingFromSymbol = true; + } else { + if (&BlockToFix == &ToSymbol->getAddressable()) + FixingFromSymbol = false; + else { + // BlockToFix was neither FromSymbol nor ToSymbol. + return make_error<JITLinkError>("SUBTRACTOR relocation must fix up " + "either 'A' or 'B' (or a symbol in one " + "of their alt-entry groups)"); + } + } + + if (FixingFromSymbol) { TargetSymbol = ToSymbol; DeltaKind = (SubRI.r_length == 3) ? aarch64::Delta64 : aarch64::Delta32; Addend = FixupValue + (FixupAddress - FromSymbol->getAddress()); // FIXME: handle extern 'from'. - } else if (&BlockToFix == &ToSymbol->getAddressable()) { + } else { TargetSymbol = &*FromSymbol; DeltaKind = (SubRI.r_length == 3) ? aarch64::NegDelta64 : aarch64::NegDelta32; Addend = FixupValue - (FixupAddress - ToSymbol->getAddress()); - } else { - // BlockToFix was neither FromSymbol nor ToSymbol. - return make_error<JITLinkError>("SUBTRACTOR relocation must fix up " - "either 'A' or 'B' (or a symbol in one " - "of their alt-entry groups)"); } return PairRelocInfo(DeltaKind, TargetSymbol, Addend); diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index 49f619357f08..eeca27771ad6 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -179,21 +179,41 @@ private: Edge::Kind DeltaKind; Symbol *TargetSymbol; uint64_t Addend; + + bool FixingFromSymbol = true; if (&BlockToFix == &FromSymbol->getAddressable()) { + if (LLVM_UNLIKELY(&BlockToFix == &ToSymbol->getAddressable())) { + // From and To are symbols in the same block. Decide direction by offset + // instead. + if (ToSymbol->getAddress() > FixupAddress) + FixingFromSymbol = true; + else if (FromSymbol->getAddress() > FixupAddress) + FixingFromSymbol = false; + else + FixingFromSymbol = FromSymbol->getAddress() >= ToSymbol->getAddress(); + } else + FixingFromSymbol = true; + } else { + if (&BlockToFix == &ToSymbol->getAddressable()) + FixingFromSymbol = false; + else { + // BlockToFix was neither FromSymbol nor ToSymbol. + return make_error<JITLinkError>("SUBTRACTOR relocation must fix up " + "either 'A' or 'B' (or a symbol in one " + "of their alt-entry groups)"); + } + } + + if (FixingFromSymbol) { TargetSymbol = ToSymbol; DeltaKind = (SubRI.r_length == 3) ? x86_64::Delta64 : x86_64::Delta32; Addend = FixupValue + (FixupAddress - FromSymbol->getAddress()); // FIXME: handle extern 'from'. - } else if (&BlockToFix == &ToSymbol->getAddressable()) { + } else { TargetSymbol = FromSymbol; DeltaKind = (SubRI.r_length == 3) ? x86_64::NegDelta64 : x86_64::NegDelta32; Addend = FixupValue - (FixupAddress - ToSymbol->getAddress()); - } else { - // BlockToFix was neither FromSymbol nor ToSymbol. - return make_error<JITLinkError>("SUBTRACTOR relocation must fix up " - "either 'A' or 'B' (or a symbol in one " - "of their alt-entry chains)"); } return PairRelocInfo(DeltaKind, TargetSymbol, Addend); diff --git a/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp b/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp index 709fe3212623..bcdbe5eadc69 100644 --- a/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp +++ b/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp @@ -23,13 +23,15 @@ GlobalVariable *FrontendResource::getGlobalVariable() { cast<ConstantAsMetadata>(Entry->getOperand(0))->getValue()); } -StringRef FrontendResource::getSourceType() { - return cast<MDString>(Entry->getOperand(1))->getString(); -} - ResourceKind FrontendResource::getResourceKind() { return static_cast<ResourceKind>( cast<ConstantInt>( + cast<ConstantAsMetadata>(Entry->getOperand(1))->getValue()) + ->getLimitedValue()); +} +ElementType FrontendResource::getElementType() { + return static_cast<ElementType>( + cast<ConstantInt>( cast<ConstantAsMetadata>(Entry->getOperand(2))->getValue()) ->getLimitedValue()); } @@ -49,14 +51,15 @@ uint32_t FrontendResource::getSpace() { ->getLimitedValue(); } -FrontendResource::FrontendResource(GlobalVariable *GV, StringRef TypeStr, - ResourceKind RK, bool IsROV, +FrontendResource::FrontendResource(GlobalVariable *GV, ResourceKind RK, + ElementType ElTy, bool IsROV, uint32_t ResIndex, uint32_t Space) { auto &Ctx = GV->getContext(); IRBuilder<> B(Ctx); Entry = MDNode::get( - Ctx, {ValueAsMetadata::get(GV), MDString::get(Ctx, TypeStr), + Ctx, {ValueAsMetadata::get(GV), ConstantAsMetadata::get(B.getInt32(static_cast<int>(RK))), + ConstantAsMetadata::get(B.getInt32(static_cast<int>(ElTy))), ConstantAsMetadata::get(B.getInt1(IsROV)), ConstantAsMetadata::get(B.getInt32(ResIndex)), ConstantAsMetadata::get(B.getInt32(Space))}); diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp index d499d74f7ba0..7fdc35e7fca0 100644 --- a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp +++ b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp @@ -868,7 +868,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, } if (GVAlign > 1) { - unsigned DstWidth = CI2->getType()->getBitWidth(); + unsigned DstWidth = CI2->getBitWidth(); unsigned SrcWidth = std::min(DstWidth, Log2(GVAlign)); APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth)); diff --git a/contrib/llvm-project/llvm/lib/IR/Globals.cpp b/contrib/llvm-project/llvm/lib/IR/Globals.cpp index 51bdbeb0abf2..239acd2181e8 100644 --- a/contrib/llvm-project/llvm/lib/IR/Globals.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Globals.cpp @@ -144,25 +144,27 @@ void GlobalObject::copyAttributesFrom(const GlobalObject *Src) { std::string GlobalValue::getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName) { - // Value names may be prefixed with a binary '1' to indicate // that the backend should not modify the symbols due to any platform // naming convention. Do not include that '1' in the PGO profile name. if (Name[0] == '\1') Name = Name.substr(1); - std::string NewName = std::string(Name); + std::string GlobalName; if (llvm::GlobalValue::isLocalLinkage(Linkage)) { // For local symbols, prepend the main file name to distinguish them. // Do not include the full path in the file name since there's no guarantee // that it will stay the same, e.g., if the files are checked out from // version control in different locations. if (FileName.empty()) - NewName = NewName.insert(0, "<unknown>:"); + GlobalName += "<unknown>"; else - NewName = NewName.insert(0, FileName.str() + ":"); + GlobalName += FileName; + + GlobalName += kGlobalIdentifierDelimiter; } - return NewName; + GlobalName += Name; + return GlobalName; } std::string GlobalValue::getGlobalIdentifier() const { diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp index 8ddf51537ec1..57077e786efc 100644 --- a/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp +++ b/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp @@ -256,10 +256,13 @@ void LLVMContext::diagnose(const DiagnosticInfo &DI) { RS->emit(*OptDiagBase); // If there is a report handler, use it. - if (pImpl->DiagHandler && - (!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) && - pImpl->DiagHandler->handleDiagnostics(DI)) - return; + if (pImpl->DiagHandler) { + if (DI.getSeverity() == DS_Error) + pImpl->DiagHandler->HasErrors = true; + if ((!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) && + pImpl->DiagHandler->handleDiagnostics(DI)) + return; + } if (!isDiagnosticEnabled(DI)) return; diff --git a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp index 7bc25e30b893..515893d079b8 100644 --- a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp @@ -1566,7 +1566,7 @@ void Instruction::updateDIAssignIDMapping(DIAssignID *ID) { "Expect existing attachment to be mapped"); auto &InstVec = InstrsIt->second; - auto *InstIt = std::find(InstVec.begin(), InstVec.end(), this); + auto *InstIt = llvm::find(InstVec, this); assert(InstIt != InstVec.end() && "Expect instruction to be mapped to attachment"); // The vector contains a ptr to this. If this is the only element in the diff --git a/contrib/llvm-project/llvm/lib/IR/Type.cpp b/contrib/llvm-project/llvm/lib/IR/Type.cpp index 85d779c98a9b..c59bc3622fde 100644 --- a/contrib/llvm-project/llvm/lib/IR/Type.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Type.cpp @@ -834,6 +834,8 @@ struct TargetTypeInfo { static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { LLVMContext &C = Ty->getContext(); StringRef Name = Ty->getName(); + if (Name.equals("spirv.Image")) + return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::CanBeGlobal); if (Name.starts_with("spirv.")) return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::HasZeroInit, TargetExtType::CanBeGlobal); diff --git a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp index 8aba28026306..aeaca21a99cc 100644 --- a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp @@ -2296,10 +2296,9 @@ void Verifier::verifyFunctionMetadata( Check(isa<ConstantAsMetadata>(MD->getOperand(0)), "expected a constant operand for !kcfi_type", MD); Constant *C = cast<ConstantAsMetadata>(MD->getOperand(0))->getValue(); - Check(isa<ConstantInt>(C), + Check(isa<ConstantInt>(C) && isa<IntegerType>(C->getType()), "expected a constant integer operand for !kcfi_type", MD); - IntegerType *Type = cast<ConstantInt>(C)->getType(); - Check(Type->getBitWidth() == 32, + Check(cast<ConstantInt>(C)->getBitWidth() == 32, "expected a 32-bit integer constant operand for !kcfi_type", MD); } } @@ -5690,8 +5689,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "vector of ints"); auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2)); - Check(Op3->getType()->getBitWidth() <= 32, - "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits"); + Check(Op3->getType()->isIntegerTy(), + "third operand of [us][mul|div]_fix[_sat] must be an int type"); + Check(Op3->getBitWidth() <= 32, + "third operand of [us][mul|div]_fix[_sat] must fit within 32 bits"); if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat || ID == Intrinsic::sdiv_fix || ID == Intrinsic::sdiv_fix_sat) { diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp index 9e1d108ac14d..49668de27d67 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp @@ -154,7 +154,10 @@ public: void emitGNUAttribute(unsigned Tag, unsigned Value) override; StringRef getMnemonic(MCInst &MI) override { - return InstPrinter->getMnemonic(&MI).first; + auto [Ptr, Bits] = InstPrinter->getMnemonic(&MI); + assert((Bits != 0 || Ptr == nullptr) && + "Invalid char pointer for instruction with no mnemonic"); + return Ptr; } void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; diff --git a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp index 6e72b5062a1d..c1db7e3943c4 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp @@ -650,10 +650,16 @@ MCSectionGOFF *MCContext::getGOFFSection(StringRef Section, SectionKind Kind, MCSection *Parent, const MCExpr *SubsectionId) { // Do the lookup. If we don't have a hit, return a new section. - auto &GOFFSection = GOFFUniquingMap[Section.str()]; - if (!GOFFSection) - GOFFSection = new (GOFFAllocator.Allocate()) - MCSectionGOFF(Section, Kind, Parent, SubsectionId); + auto IterBool = + GOFFUniquingMap.insert(std::make_pair(Section.str(), nullptr)); + auto Iter = IterBool.first; + if (!IterBool.second) + return Iter->second; + + StringRef CachedName = Iter->first; + MCSectionGOFF *GOFFSection = new (GOFFAllocator.Allocate()) + MCSectionGOFF(CachedName, Kind, Parent, SubsectionId); + Iter->second = GOFFSection; return GOFFSection; } diff --git a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp index 73e6569f96e4..a85182aa06ad 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp @@ -632,7 +632,8 @@ static void AttemptToFoldSymbolOffsetDifference( // instructions and InSet is false (not expressions in directive like // .size/.fill), disable the fast path. if (Layout && (InSet || !SecA.hasInstructions() || - !Asm->getContext().getTargetTriple().isRISCV())) { + !(Asm->getContext().getTargetTriple().isRISCV() || + Asm->getContext().getTargetTriple().isLoongArch()))) { // If both symbols are in the same fragment, return the difference of their // offsets. canGetFragmentOffset(FA) may be false. if (FA == FB && !SA.isVariable() && !SB.isVariable()) { @@ -942,16 +943,17 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, Addrs, InSet)) { // Check if both are Target Expressions, see if we can compare them. if (const MCTargetExpr *L = dyn_cast<MCTargetExpr>(ABE->getLHS())) { - const MCTargetExpr *R = cast<MCTargetExpr>(ABE->getRHS()); - switch (ABE->getOpcode()) { - case MCBinaryExpr::EQ: - Res = MCValue::get(L->isEqualTo(R) ? -1 : 0); - return true; - case MCBinaryExpr::NE: - Res = MCValue::get(L->isEqualTo(R) ? 0 : -1); - return true; - default: - break; + if (const MCTargetExpr *R = dyn_cast<MCTargetExpr>(ABE->getRHS())) { + switch (ABE->getOpcode()) { + case MCBinaryExpr::EQ: + Res = MCValue::get(L->isEqualTo(R) ? -1 : 0); + return true; + case MCBinaryExpr::NE: + Res = MCValue::get(L->isEqualTo(R) ? 0 : -1); + return true; + default: + break; + } } } return false; diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp index 5352736bdcb9..c8b66d6fcb5e 100644 --- a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp +++ b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp @@ -2638,7 +2638,7 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() { Error BinaryWriter::write() { SmallVector<const SectionBase *, 30> SectionsToWrite; for (const SectionBase &Sec : Obj.allocSections()) { - if (Sec.Type != SHT_NOBITS) + if (Sec.Type != SHT_NOBITS && Sec.Size > 0) SectionsToWrite.push_back(&Sec); } diff --git a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp index 3c86b0f25dda..95c4f9f8545d 100644 --- a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp +++ b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp @@ -358,6 +358,8 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const { switch (getEMachine()) { case ELF::EM_AMDGPU: return getAMDGPUCPUName(); + case ELF::EM_CUDA: + return getNVPTXCPUName(); case ELF::EM_PPC: case ELF::EM_PPC64: return StringRef("future"); @@ -517,6 +519,73 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { } } +StringRef ELFObjectFileBase::getNVPTXCPUName() const { + assert(getEMachine() == ELF::EM_CUDA); + unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM; + + switch (SM) { + // Fermi architecture. + case ELF::EF_CUDA_SM20: + return "sm_20"; + case ELF::EF_CUDA_SM21: + return "sm_21"; + + // Kepler architecture. + case ELF::EF_CUDA_SM30: + return "sm_30"; + case ELF::EF_CUDA_SM32: + return "sm_32"; + case ELF::EF_CUDA_SM35: + return "sm_35"; + case ELF::EF_CUDA_SM37: + return "sm_37"; + + // Maxwell architecture. + case ELF::EF_CUDA_SM50: + return "sm_50"; + case ELF::EF_CUDA_SM52: + return "sm_52"; + case ELF::EF_CUDA_SM53: + return "sm_53"; + + // Pascal architecture. + case ELF::EF_CUDA_SM60: + return "sm_60"; + case ELF::EF_CUDA_SM61: + return "sm_61"; + case ELF::EF_CUDA_SM62: + return "sm_62"; + + // Volta architecture. + case ELF::EF_CUDA_SM70: + return "sm_70"; + case ELF::EF_CUDA_SM72: + return "sm_72"; + + // Turing architecture. + case ELF::EF_CUDA_SM75: + return "sm_75"; + + // Ampere architecture. + case ELF::EF_CUDA_SM80: + return "sm_80"; + case ELF::EF_CUDA_SM86: + return "sm_86"; + case ELF::EF_CUDA_SM87: + return "sm_87"; + + // Ada architecture. + case ELF::EF_CUDA_SM89: + return "sm_89"; + + // Hopper architecture. + case ELF::EF_CUDA_SM90: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90"; + default: + llvm_unreachable("Unknown EF_CUDA_SM value"); + } +} + // FIXME Encode from a tablegen description or target parser. void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { if (TheTriple.getSubArch() != Triple::NoSubArch) diff --git a/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp index ab073e18cb46..07f76688fa43 100644 --- a/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp @@ -16,6 +16,7 @@ #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" @@ -68,6 +69,11 @@ void ModuleSymbolTable::addModule(Module *M) { static void initializeRecordStreamer(const Module &M, function_ref<void(RecordStreamer &)> Init) { + // This function may be called twice, once for ModuleSummaryIndexAnalysis and + // the other when writing the IR symbol table. If parsing inline assembly has + // caused errors in the first run, suppress the second run. + if (M.getContext().getDiagHandlerPtr()->HasErrors) + return; StringRef InlineAsm = M.getModuleInlineAsm(); if (InlineAsm.empty()) return; @@ -95,7 +101,8 @@ initializeRecordStreamer(const Module &M, if (!MCII) return; - std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); + std::unique_ptr<MemoryBuffer> Buffer( + MemoryBuffer::getMemBuffer(InlineAsm, "<inline asm>")); SourceMgr SrcMgr; SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); @@ -115,6 +122,13 @@ initializeRecordStreamer(const Module &M, if (!TAP) return; + MCCtx.setDiagnosticHandler([&](const SMDiagnostic &SMD, bool IsInlineAsm, + const SourceMgr &SrcMgr, + std::vector<const MDNode *> &LocInfos) { + M.getContext().diagnose( + DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, /*LocCookie=*/0)); + }); + // Module-level inline asm is assumed to use At&t syntax (see // AsmPrinter::doInitialization()). Parser->setAssemblerDialect(InlineAsm::AD_ATT); diff --git a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp index 168fb57935d6..dfe86a45df32 100644 --- a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp +++ b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp @@ -265,7 +265,6 @@ static wasm::WasmTableType readTableType(WasmObjectFile::ReadContext &Ctx) { static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx, WasmSectionOrderChecker &Checker) { - Section.Offset = Ctx.Ptr - Ctx.Start; Section.Type = readUint8(Ctx); LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n"); // When reading the section's size, store the size of the LEB used to encode @@ -273,6 +272,7 @@ static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx, const uint8_t *PreSizePtr = Ctx.Ptr; uint32_t Size = readVaruint32(Ctx); Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr; + Section.Offset = Ctx.Ptr - Ctx.Start; if (Size == 0) return make_error<StringError>("zero length section", object_error::parse_failed); @@ -599,6 +599,10 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) { Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) { uint32_t Count = readVaruint32(Ctx); + // Clear out any symbol information that was derived from the exports + // section. + LinkingData.SymbolTable.clear(); + Symbols.clear(); LinkingData.SymbolTable.reserve(Count); Symbols.reserve(Count); StringSet<> SymbolNames; @@ -1290,37 +1294,75 @@ Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) { Error WasmObjectFile::parseExportSection(ReadContext &Ctx) { uint32_t Count = readVaruint32(Ctx); Exports.reserve(Count); + LinkingData.SymbolTable.reserve(Count); + Symbols.reserve(Count); for (uint32_t I = 0; I < Count; I++) { wasm::WasmExport Ex; Ex.Name = readString(Ctx); Ex.Kind = readUint8(Ctx); Ex.Index = readVaruint32(Ctx); + const wasm::WasmSignature *Signature = nullptr; + const wasm::WasmGlobalType *GlobalType = nullptr; + const wasm::WasmTableType *TableType = nullptr; + wasm::WasmSymbolInfo Info; + Info.Name = Ex.Name; + Info.Flags = 0; switch (Ex.Kind) { - case wasm::WASM_EXTERNAL_FUNCTION: - + case wasm::WASM_EXTERNAL_FUNCTION: { if (!isDefinedFunctionIndex(Ex.Index)) return make_error<GenericBinaryError>("invalid function export", object_error::parse_failed); getDefinedFunction(Ex.Index).ExportName = Ex.Name; + Info.Kind = wasm::WASM_SYMBOL_TYPE_FUNCTION; + Info.ElementIndex = Ex.Index; + unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions; + wasm::WasmFunction &Function = Functions[FuncIndex]; + Signature = &Signatures[Function.SigIndex]; break; - case wasm::WASM_EXTERNAL_GLOBAL: + } + case wasm::WASM_EXTERNAL_GLOBAL: { if (!isValidGlobalIndex(Ex.Index)) return make_error<GenericBinaryError>("invalid global export", object_error::parse_failed); + Info.Kind = wasm::WASM_SYMBOL_TYPE_DATA; + uint64_t Offset = 0; + if (isDefinedGlobalIndex(Ex.Index)) { + auto Global = getDefinedGlobal(Ex.Index); + if (!Global.InitExpr.Extended) { + auto Inst = Global.InitExpr.Inst; + if (Inst.Opcode == wasm::WASM_OPCODE_I32_CONST) { + Offset = Inst.Value.Int32; + } else if (Inst.Opcode == wasm::WASM_OPCODE_I64_CONST) { + Offset = Inst.Value.Int64; + } + } + } + Info.DataRef = wasm::WasmDataReference{0, Offset, 0}; break; + } case wasm::WASM_EXTERNAL_TAG: if (!isValidTagIndex(Ex.Index)) return make_error<GenericBinaryError>("invalid tag export", object_error::parse_failed); + Info.Kind = wasm::WASM_SYMBOL_TYPE_TAG; + Info.ElementIndex = Ex.Index; break; case wasm::WASM_EXTERNAL_MEMORY: + break; case wasm::WASM_EXTERNAL_TABLE: + Info.Kind = wasm::WASM_SYMBOL_TYPE_TABLE; break; default: return make_error<GenericBinaryError>("unexpected export kind", object_error::parse_failed); } Exports.push_back(Ex); + if (Ex.Kind != wasm::WASM_EXTERNAL_MEMORY) { + LinkingData.SymbolTable.emplace_back(Info); + Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, + TableType, Signature); + LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n"); + } } if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("export section ended prematurely", @@ -1644,6 +1686,8 @@ uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const { return Segment.Offset.Inst.Value.Int32 + Sym.Info.DataRef.Offset; } else if (Segment.Offset.Inst.Opcode == wasm::WASM_OPCODE_I64_CONST) { return Segment.Offset.Inst.Value.Int64 + Sym.Info.DataRef.Offset; + } else if (Segment.Offset.Inst.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) { + return Sym.Info.DataRef.Offset; } else { llvm_unreachable("unknown init expr opcode"); } diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp index 95b9fb7ad735..f94bd422c6b5 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp @@ -83,6 +83,7 @@ #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" #include "llvm/CodeGen/JMCInstrumenter.h" +#include "llvm/CodeGen/LowerEmuTLS.h" #include "llvm/CodeGen/SafeStack.h" #include "llvm/CodeGen/SelectOptimize.h" #include "llvm/CodeGen/SjLjEHPrepare.h" diff --git a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def index d8fc7cd8a231..82ce040c6496 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def +++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def @@ -83,6 +83,7 @@ MODULE_PASS("internalize", InternalizePass()) MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass()) MODULE_PASS("iroutliner", IROutlinerPass()) MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass()) +MODULE_PASS("lower-emutls", LowerEmuTLSPass()) MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass()) MODULE_PASS("lower-ifunc", LowerIFuncPass()) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp index 649d814cfd9d..134a400e639c 100644 --- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp +++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp @@ -246,11 +246,27 @@ std::string InstrProfError::message() const { char InstrProfError::ID = 0; -std::string getPGOFuncName(StringRef RawFuncName, - GlobalValue::LinkageTypes Linkage, +std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName, uint64_t Version LLVM_ATTRIBUTE_UNUSED) { - return GlobalValue::getGlobalIdentifier(RawFuncName, Linkage, FileName); + // Value names may be prefixed with a binary '1' to indicate + // that the backend should not modify the symbols due to any platform + // naming convention. Do not include that '1' in the PGO profile name. + if (Name[0] == '\1') + Name = Name.substr(1); + + std::string NewName = std::string(Name); + if (llvm::GlobalValue::isLocalLinkage(Linkage)) { + // For local symbols, prepend the main file name to distinguish them. + // Do not include the full path in the file name since there's no guarantee + // that it will stay the same, e.g., if the files are checked out from + // version control in different locations. + if (FileName.empty()) + NewName = NewName.insert(0, "<unknown>:"); + else + NewName = NewName.insert(0, FileName.str() + ":"); + } + return NewName; } // Strip NumPrefix level of directory name from PathNameStr. If the number of @@ -300,12 +316,10 @@ getIRPGONameForGlobalObject(const GlobalObject &GO, GlobalValue::LinkageTypes Linkage, StringRef FileName) { SmallString<64> Name; - if (llvm::GlobalValue::isLocalLinkage(Linkage)) { - Name.append(FileName.empty() ? "<unknown>" : FileName); - Name.append(";"); - } + // FIXME: Mangler's handling is kept outside of `getGlobalIdentifier` for now. + // For more details please check issue #74565. Mangler().getNameWithPrefix(Name, &GO, /*CannotUsePrivateLabel=*/true); - return Name.str().str(); + return GlobalValue::getGlobalIdentifier(Name, Linkage, FileName); } static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) { @@ -352,6 +366,9 @@ std::string getIRPGOFuncName(const Function &F, bool InLTO) { return getIRPGOObjectName(F, InLTO, getPGOFuncNameMetadata(F)); } +// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is +// for front-end (Clang, etc) instrumentation. +// The implementation is kept for profile matching from older profiles. // This is similar to `getIRPGOFuncName` except that this function calls // 'getPGOFuncName' to get a name and `getIRPGOFuncName` calls // 'getIRPGONameForGlobalObject'. See the difference between two callees in the @@ -384,7 +401,8 @@ getParsedIRPGOFuncName(StringRef IRPGOFuncName) { StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) { if (FileName.empty()) return PGOFuncName; - // Drop the file name including ':'. See also getPGOFuncName. + // Drop the file name including ':' or ';'. See getIRPGONameForGlobalObject as + // well. if (PGOFuncName.starts_with(FileName)) PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1); return PGOFuncName; diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp index 068922d421f8..8f62df79d5b7 100644 --- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1008,12 +1008,13 @@ public: /// Extract the original function name from a PGO function name. static StringRef extractName(StringRef Name) { - // We can have multiple :-separated pieces; there can be pieces both - // before and after the mangled name. Find the first part that starts - // with '_Z'; we'll assume that's the mangled name we want. + // We can have multiple pieces separated by kGlobalIdentifierDelimiter ( + // semicolon now and colon in older profiles); there can be pieces both + // before and after the mangled name. Find the first part that starts with + // '_Z'; we'll assume that's the mangled name we want. std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; while (true) { - Parts = Parts.second.split(':'); + Parts = Parts.second.split(kGlobalIdentifierDelimiter); if (Parts.first.starts_with("_Z")) return Parts.first; if (Parts.second.empty()) diff --git a/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp index af2db8d61179..0e2b8668bab7 100644 --- a/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -336,6 +336,13 @@ Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { inconvertibleErrorCode()), FileName); + // Process the raw profile. + if (Error E = readRawProfile(std::move(DataBuffer))) + return E; + + if (Error E = setupForSymbolization()) + return E; + auto *Object = cast<object::ObjectFile>(Binary.getBinary()); std::unique_ptr<DIContext> Context = DWARFContext::create( *Object, DWARFContext::ProcessDebugRelocations::Process); @@ -344,16 +351,13 @@ Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { Object, std::move(Context), /*UntagAddresses=*/false); if (!SOFOr) return report(SOFOr.takeError(), FileName); - Symbolizer = std::move(SOFOr.get()); - - // Process the raw profile. - if (Error E = readRawProfile(std::move(DataBuffer))) - return E; - - if (Error E = setupForSymbolization()) - return E; + auto Symbolizer = std::move(SOFOr.get()); - if (Error E = symbolizeAndFilterStackFrames()) + // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so + // that it is freed automatically at the end, when it is no longer used. This + // reduces peak memory since it won't be live while also mapping the raw + // profile into records afterwards. + if (Error E = symbolizeAndFilterStackFrames(std::move(Symbolizer))) return E; return mapRawProfileToRecords(); @@ -469,7 +473,8 @@ Error RawMemProfReader::mapRawProfileToRecords() { return Error::success(); } -Error RawMemProfReader::symbolizeAndFilterStackFrames() { +Error RawMemProfReader::symbolizeAndFilterStackFrames( + std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) { // The specifier to use when symbolization is requested. const DILineInfoSpecifier Specifier( DILineInfoSpecifier::FileLineInfoKind::RawValue, diff --git a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp index 088b4e4d755c..368dead44914 100644 --- a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp +++ b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp @@ -175,6 +175,23 @@ public: StringRef Overview, raw_ostream *Errs = nullptr, bool LongOptionsUseDoubleDash = false); + void forEachSubCommand(Option &Opt, function_ref<void(SubCommand &)> Action) { + if (Opt.Subs.empty()) { + Action(SubCommand::getTopLevel()); + return; + } + if (Opt.Subs.size() == 1 && *Opt.Subs.begin() == &SubCommand::getAll()) { + for (auto *SC : RegisteredSubCommands) + Action(*SC); + return; + } + for (auto *SC : Opt.Subs) { + assert(SC != &SubCommand::getAll() && + "SubCommand::getAll() should not be used with other subcommands"); + Action(*SC); + } + } + void addLiteralOption(Option &Opt, SubCommand *SC, StringRef Name) { if (Opt.hasArgStr()) return; @@ -183,25 +200,11 @@ public: << "' registered more than once!\n"; report_fatal_error("inconsistency in registered CommandLine options"); } - - // If we're adding this to all sub-commands, add it to the ones that have - // already been registered. - if (SC == &SubCommand::getAll()) { - for (auto *Sub : RegisteredSubCommands) { - if (SC == Sub) - continue; - addLiteralOption(Opt, Sub, Name); - } - } } void addLiteralOption(Option &Opt, StringRef Name) { - if (Opt.Subs.empty()) - addLiteralOption(Opt, &SubCommand::getTopLevel(), Name); - else { - for (auto *SC : Opt.Subs) - addLiteralOption(Opt, SC, Name); - } + forEachSubCommand( + Opt, [&](SubCommand &SC) { addLiteralOption(Opt, &SC, Name); }); } void addOption(Option *O, SubCommand *SC) { @@ -238,16 +241,6 @@ public: // linked LLVM distribution. if (HadErrors) report_fatal_error("inconsistency in registered CommandLine options"); - - // If we're adding this to all sub-commands, add it to the ones that have - // already been registered. - if (SC == &SubCommand::getAll()) { - for (auto *Sub : RegisteredSubCommands) { - if (SC == Sub) - continue; - addOption(O, Sub); - } - } } void addOption(Option *O, bool ProcessDefaultOption = false) { @@ -255,13 +248,7 @@ public: DefaultOptions.push_back(O); return; } - - if (O->Subs.empty()) { - addOption(O, &SubCommand::getTopLevel()); - } else { - for (auto *SC : O->Subs) - addOption(O, SC); - } + forEachSubCommand(*O, [&](SubCommand &SC) { addOption(O, &SC); }); } void removeOption(Option *O, SubCommand *SC) { @@ -298,17 +285,7 @@ public: } void removeOption(Option *O) { - if (O->Subs.empty()) - removeOption(O, &SubCommand::getTopLevel()); - else { - if (O->isInAllSubCommands()) { - for (auto *SC : RegisteredSubCommands) - removeOption(O, SC); - } else { - for (auto *SC : O->Subs) - removeOption(O, SC); - } - } + forEachSubCommand(*O, [&](SubCommand &SC) { removeOption(O, &SC); }); } bool hasOptions(const SubCommand &Sub) const { @@ -344,17 +321,8 @@ public: } void updateArgStr(Option *O, StringRef NewName) { - if (O->Subs.empty()) - updateArgStr(O, NewName, &SubCommand::getTopLevel()); - else { - if (O->isInAllSubCommands()) { - for (auto *SC : RegisteredSubCommands) - updateArgStr(O, NewName, SC); - } else { - for (auto *SC : O->Subs) - updateArgStr(O, NewName, SC); - } - } + forEachSubCommand(*O, + [&](SubCommand &SC) { updateArgStr(O, NewName, &SC); }); } void printOptionValues(); diff --git a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp index bbbaf26a7bd4..7256e9a29329 100644 --- a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp @@ -145,6 +145,9 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zmmul", RISCVExtensionVersion{1, 0}}, + {"zvbb", RISCVExtensionVersion{1, 0}}, + {"zvbc", RISCVExtensionVersion{1, 0}}, + {"zve32f", RISCVExtensionVersion{1, 0}}, {"zve32x", RISCVExtensionVersion{1, 0}}, {"zve64d", RISCVExtensionVersion{1, 0}}, @@ -154,6 +157,22 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zvfh", RISCVExtensionVersion{1, 0}}, {"zvfhmin", RISCVExtensionVersion{1, 0}}, + // vector crypto + {"zvkb", RISCVExtensionVersion{1, 0}}, + {"zvkg", RISCVExtensionVersion{1, 0}}, + {"zvkn", RISCVExtensionVersion{1, 0}}, + {"zvknc", RISCVExtensionVersion{1, 0}}, + {"zvkned", RISCVExtensionVersion{1, 0}}, + {"zvkng", RISCVExtensionVersion{1, 0}}, + {"zvknha", RISCVExtensionVersion{1, 0}}, + {"zvknhb", RISCVExtensionVersion{1, 0}}, + {"zvks", RISCVExtensionVersion{1, 0}}, + {"zvksc", RISCVExtensionVersion{1, 0}}, + {"zvksed", RISCVExtensionVersion{1, 0}}, + {"zvksg", RISCVExtensionVersion{1, 0}}, + {"zvksh", RISCVExtensionVersion{1, 0}}, + {"zvkt", RISCVExtensionVersion{1, 0}}, + {"zvl1024b", RISCVExtensionVersion{1, 0}}, {"zvl128b", RISCVExtensionVersion{1, 0}}, {"zvl16384b", RISCVExtensionVersion{1, 0}}, @@ -179,27 +198,8 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"ztso", RISCVExtensionVersion{0, 1}}, - {"zvbb", RISCVExtensionVersion{1, 0}}, - {"zvbc", RISCVExtensionVersion{1, 0}}, - {"zvfbfmin", RISCVExtensionVersion{0, 8}}, {"zvfbfwma", RISCVExtensionVersion{0, 8}}, - - // vector crypto - {"zvkb", RISCVExtensionVersion{1, 0}}, - {"zvkg", RISCVExtensionVersion{1, 0}}, - {"zvkn", RISCVExtensionVersion{1, 0}}, - {"zvknc", RISCVExtensionVersion{1, 0}}, - {"zvkned", RISCVExtensionVersion{1, 0}}, - {"zvkng", RISCVExtensionVersion{1, 0}}, - {"zvknha", RISCVExtensionVersion{1, 0}}, - {"zvknhb", RISCVExtensionVersion{1, 0}}, - {"zvks", RISCVExtensionVersion{1, 0}}, - {"zvksc", RISCVExtensionVersion{1, 0}}, - {"zvksed", RISCVExtensionVersion{1, 0}}, - {"zvksg", RISCVExtensionVersion{1, 0}}, - {"zvksh", RISCVExtensionVersion{1, 0}}, - {"zvkt", RISCVExtensionVersion{1, 0}}, }; static void verifyTables() { @@ -215,11 +215,12 @@ static void verifyTables() { #endif } -static void PrintExtension(const std::string Name, const std::string Version, - const std::string Description) { - outs() << " " - << format(Description.empty() ? "%-20s%s\n" : "%-20s%-10s%s\n", - Name.c_str(), Version.c_str(), Description.c_str()); +static void PrintExtension(StringRef Name, StringRef Version, + StringRef Description) { + outs().indent(4); + unsigned VersionWidth = Description.empty() ? 0 : 10; + outs() << left_justify(Name, 20) << left_justify(Version, VersionWidth) + << Description << "\n"; } void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) { @@ -233,7 +234,7 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) { for (const auto &E : ExtMap) { std::string Version = std::to_string(E.second.MajorVersion) + "." + std::to_string(E.second.MinorVersion); - PrintExtension(E.first, Version, DescMap[E.first].str()); + PrintExtension(E.first, Version, DescMap[E.first]); } outs() << "\nExperimental extensions\n"; @@ -243,7 +244,7 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) { for (const auto &E : ExtMap) { std::string Version = std::to_string(E.second.MajorVersion) + "." + std::to_string(E.second.MinorVersion); - PrintExtension(E.first, Version, DescMap["experimental-" + E.first].str()); + PrintExtension(E.first, Version, DescMap["experimental-" + E.first]); } outs() << "\nUse -march to specify the target's extension.\n" @@ -995,25 +996,25 @@ static const char *ImpliedExtsD[] = {"f"}; static const char *ImpliedExtsF[] = {"zicsr"}; static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"}; static const char *ImpliedExtsXTHeadVdot[] = {"v"}; -static const char *ImpliedExtsXsfvcp[] = {"zve32x"}; -static const char *ImpliedExtsXsfvfnrclipxfqf[] = {"zve32f"}; -static const char *ImpliedExtsXsfvfwmaccqqq[] = {"zve32f", "zvfbfmin"}; -static const char *ImpliedExtsXsfvqmaccdod[] = {"zve32x"}; -static const char *ImpliedExtsXsfvqmaccqoq[] = {"zve32x"}; +static const char *ImpliedExtsXSfvcp[] = {"zve32x"}; +static const char *ImpliedExtsXSfvfnrclipxfqf[] = {"zve32f"}; +static const char *ImpliedExtsXSfvfwmaccqqq[] = {"zvfbfmin"}; +static const char *ImpliedExtsXSfvqmaccdod[] = {"zve32x"}; +static const char *ImpliedExtsXSfvqmaccqoq[] = {"zve32x"}; static const char *ImpliedExtsZacas[] = {"a"}; static const char *ImpliedExtsZcb[] = {"zca"}; static const char *ImpliedExtsZcd[] = {"d", "zca"}; static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"}; static const char *ImpliedExtsZcf[] = {"f", "zca"}; static const char *ImpliedExtsZcmp[] = {"zca"}; -static const char *ImpliedExtsZcmt[] = {"zca"}; +static const char *ImpliedExtsZcmt[] = {"zca", "zicsr"}; static const char *ImpliedExtsZdinx[] = {"zfinx"}; static const char *ImpliedExtsZfa[] = {"f"}; static const char *ImpliedExtsZfbfmin[] = {"f"}; static const char *ImpliedExtsZfh[] = {"zfhmin"}; static const char *ImpliedExtsZfhmin[] = {"f"}; static const char *ImpliedExtsZfinx[] = {"zicsr"}; -static const char *ImpliedExtsZhinx[] = {"zfinx"}; +static const char *ImpliedExtsZhinx[] = {"zhinxmin"}; static const char *ImpliedExtsZhinxmin[] = {"zfinx"}; static const char *ImpliedExtsZicntr[] = {"zicsr"}; static const char *ImpliedExtsZihpm[] = {"zicsr"}; @@ -1027,9 +1028,9 @@ static const char *ImpliedExtsZve32x[] = {"zvl32b", "zicsr"}; static const char *ImpliedExtsZve64d[] = {"zve64f", "d"}; static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"}; static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"}; -static const char *ImpliedExtsZvfbfmin[] = {"zve32f", "zfbfmin"}; -static const char *ImpliedExtsZvfbfwma[] = {"zvfbfmin"}; -static const char *ImpliedExtsZvfh[] = {"zve32f", "zfhmin"}; +static const char *ImpliedExtsZvfbfmin[] = {"zve32f"}; +static const char *ImpliedExtsZvfbfwma[] = {"zvfbfmin", "zfbfmin"}; +static const char *ImpliedExtsZvfh[] = {"zvfhmin", "zfhmin"}; static const char *ImpliedExtsZvfhmin[] = {"zve32f"}; static const char *ImpliedExtsZvkn[] = {"zvkb", "zvkned", "zvknhb", "zvkt"}; static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"}; @@ -1066,11 +1067,11 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"d"}, {ImpliedExtsD}}, {{"f"}, {ImpliedExtsF}}, {{"v"}, {ImpliedExtsV}}, - {{"xsfvcp"}, {ImpliedExtsXsfvcp}}, - {{"xsfvfnrclipxfqf"}, {ImpliedExtsXsfvfnrclipxfqf}}, - {{"xsfvfwmaccqqq"}, {ImpliedExtsXsfvfwmaccqqq}}, - {{"xsfvqmaccdod"}, {ImpliedExtsXsfvqmaccdod}}, - {{"xsfvqmaccqoq"}, {ImpliedExtsXsfvqmaccqoq}}, + {{"xsfvcp"}, {ImpliedExtsXSfvcp}}, + {{"xsfvfnrclipxfqf"}, {ImpliedExtsXSfvfnrclipxfqf}}, + {{"xsfvfwmaccqqq"}, {ImpliedExtsXSfvfwmaccqqq}}, + {{"xsfvqmaccdod"}, {ImpliedExtsXSfvqmaccdod}}, + {{"xsfvqmaccqoq"}, {ImpliedExtsXSfvqmaccqoq}}, {{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}}, {{"zacas"}, {ImpliedExtsZacas}}, {{"zcb"}, {ImpliedExtsZcb}}, diff --git a/contrib/llvm-project/llvm/lib/Support/Signals.cpp b/contrib/llvm-project/llvm/lib/Support/Signals.cpp index 669a9e2a8396..9f9030e79d10 100644 --- a/contrib/llvm-project/llvm/lib/Support/Signals.cpp +++ b/contrib/llvm-project/llvm/lib/Support/Signals.cpp @@ -145,7 +145,7 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, return false; // Don't recursively invoke the llvm-symbolizer binary. - if (Argv0.find("llvm-symbolizer") != std::string::npos) + if (Argv0.contains("llvm-symbolizer")) return false; // FIXME: Subtract necessary number from StackTrace entries to turn return addresses diff --git a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp index 28ab85d4344c..3d3a564af51d 100644 --- a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp +++ b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp @@ -16,7 +16,6 @@ #include "llvm/Support/AutoConvert.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Duration.h" -#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" @@ -25,17 +24,11 @@ #include "llvm/Support/NativeFormatting.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" -#include "llvm/Support/Threading.h" #include <algorithm> #include <cerrno> #include <cstdio> #include <sys/stat.h> -#ifndef _WIN32 -#include <sys/socket.h> -#include <sys/un.h> -#endif // _WIN32 - // <fcntl.h> may provide O_BINARY. #if defined(HAVE_FCNTL_H) # include <fcntl.h> @@ -66,13 +59,6 @@ #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Windows/WindowsSupport.h" -// winsock2.h must be included before afunix.h. Briefly turn off clang-format to -// avoid error. -// clang-format off -#include <winsock2.h> -#include <afunix.h> -// clang-format on -#include <io.h> #endif using namespace llvm; @@ -659,7 +645,7 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered, // Check if this is a console device. This is not equivalent to isatty. IsWindowsConsole = ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; -#endif // _WIN32 +#endif // Get the starting position. off_t loc = ::lseek(FD, 0, SEEK_CUR); @@ -969,153 +955,6 @@ bool raw_fd_stream::classof(const raw_ostream *OS) { } //===----------------------------------------------------------------------===// -// raw_socket_stream -//===----------------------------------------------------------------------===// - -#ifdef _WIN32 -WSABalancer::WSABalancer() { - WSADATA WsaData; - ::memset(&WsaData, 0, sizeof(WsaData)); - if (WSAStartup(MAKEWORD(2, 2), &WsaData) != 0) { - llvm::report_fatal_error("WSAStartup failed"); - } -} - -WSABalancer::~WSABalancer() { WSACleanup(); } - -#endif // _WIN32 - -static std::error_code getLastSocketErrorCode() { -#ifdef _WIN32 - return std::error_code(::WSAGetLastError(), std::system_category()); -#else - return std::error_code(errno, std::system_category()); -#endif -} - -ListeningSocket::ListeningSocket(int SocketFD, StringRef SocketPath) - : FD(SocketFD), SocketPath(SocketPath) {} - -ListeningSocket::ListeningSocket(ListeningSocket &&LS) - : FD(LS.FD), SocketPath(LS.SocketPath) { - LS.FD = -1; -} - -Expected<ListeningSocket> ListeningSocket::createUnix(StringRef SocketPath, - int MaxBacklog) { - -#ifdef _WIN32 - WSABalancer _; - SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); - if (MaybeWinsocket == INVALID_SOCKET) { -#else - int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); - if (MaybeWinsocket == -1) { -#endif - return llvm::make_error<StringError>(getLastSocketErrorCode(), - "socket create failed"); - } - - struct sockaddr_un Addr; - memset(&Addr, 0, sizeof(Addr)); - Addr.sun_family = AF_UNIX; - strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1); - - if (bind(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)) == -1) { - std::error_code Err = getLastSocketErrorCode(); - if (Err == std::errc::address_in_use) - ::close(MaybeWinsocket); - return llvm::make_error<StringError>(Err, "Bind error"); - } - if (listen(MaybeWinsocket, MaxBacklog) == -1) { - return llvm::make_error<StringError>(getLastSocketErrorCode(), - "Listen error"); - } - int UnixSocket; -#ifdef _WIN32 - UnixSocket = _open_osfhandle(MaybeWinsocket, 0); -#else - UnixSocket = MaybeWinsocket; -#endif // _WIN32 - return ListeningSocket{UnixSocket, SocketPath}; -} - -Expected<std::unique_ptr<raw_socket_stream>> ListeningSocket::accept() { - int AcceptFD; -#ifdef _WIN32 - SOCKET WinServerSock = _get_osfhandle(FD); - SOCKET WinAcceptSock = ::accept(WinServerSock, NULL, NULL); - AcceptFD = _open_osfhandle(WinAcceptSock, 0); -#else - AcceptFD = ::accept(FD, NULL, NULL); -#endif //_WIN32 - if (AcceptFD == -1) - return llvm::make_error<StringError>(getLastSocketErrorCode(), - "Accept failed"); - return std::make_unique<raw_socket_stream>(AcceptFD); -} - -ListeningSocket::~ListeningSocket() { - if (FD == -1) - return; - ::close(FD); - unlink(SocketPath.c_str()); -} - -static Expected<int> GetSocketFD(StringRef SocketPath) { -#ifdef _WIN32 - SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); - if (MaybeWinsocket == INVALID_SOCKET) { -#else - int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); - if (MaybeWinsocket == -1) { -#endif // _WIN32 - return llvm::make_error<StringError>(getLastSocketErrorCode(), - "Create socket failed"); - } - - struct sockaddr_un Addr; - memset(&Addr, 0, sizeof(Addr)); - Addr.sun_family = AF_UNIX; - strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1); - - int status = connect(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)); - if (status == -1) { - return llvm::make_error<StringError>(getLastSocketErrorCode(), - "Connect socket failed"); - } -#ifdef _WIN32 - return _open_osfhandle(MaybeWinsocket, 0); -#else - return MaybeWinsocket; -#endif // _WIN32 -} - -raw_socket_stream::raw_socket_stream(int SocketFD) - : raw_fd_stream(SocketFD, true) {} - -Expected<std::unique_ptr<raw_socket_stream>> -raw_socket_stream::createConnectedUnix(StringRef SocketPath) { -#ifdef _WIN32 - WSABalancer _; -#endif // _WIN32 - Expected<int> FD = GetSocketFD(SocketPath); - if (!FD) - return FD.takeError(); - return std::make_unique<raw_socket_stream>(*FD); -} - -raw_socket_stream::~raw_socket_stream() {} - -//===----------------------------------------------------------------------===// -// raw_string_ostream -//===----------------------------------------------------------------------===// - -void raw_string_ostream::write_impl(const char *Ptr, size_t Size) { - OS.append(Ptr, Size); -} - -//===----------------------------------------------------------------------===// // raw_svector_ostream //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Support/raw_socket_stream.cpp b/contrib/llvm-project/llvm/lib/Support/raw_socket_stream.cpp new file mode 100644 index 000000000000..4659880cfe19 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Support/raw_socket_stream.cpp @@ -0,0 +1,179 @@ +//===-- llvm/Support/raw_socket_stream.cpp - Socket streams --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains raw_ostream implementations for streams to communicate +// via UNIX sockets +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/raw_socket_stream.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Error.h" + +#ifndef _WIN32 +#include <sys/socket.h> +#include <sys/un.h> +#else +#include "llvm/Support/Windows/WindowsSupport.h" +// winsock2.h must be included before afunix.h. Briefly turn off clang-format to +// avoid error. +// clang-format off +#include <winsock2.h> +#include <afunix.h> +// clang-format on +#include <io.h> +#endif // _WIN32 + +#if defined(HAVE_UNISTD_H) +#include <unistd.h> +#endif + +using namespace llvm; + +#ifdef _WIN32 +WSABalancer::WSABalancer() { + WSADATA WsaData; + ::memset(&WsaData, 0, sizeof(WsaData)); + if (WSAStartup(MAKEWORD(2, 2), &WsaData) != 0) { + llvm::report_fatal_error("WSAStartup failed"); + } +} + +WSABalancer::~WSABalancer() { WSACleanup(); } + +#endif // _WIN32 + +static std::error_code getLastSocketErrorCode() { +#ifdef _WIN32 + return std::error_code(::WSAGetLastError(), std::system_category()); +#else + return std::error_code(errno, std::system_category()); +#endif +} + +ListeningSocket::ListeningSocket(int SocketFD, StringRef SocketPath) + : FD(SocketFD), SocketPath(SocketPath) {} + +ListeningSocket::ListeningSocket(ListeningSocket &&LS) + : FD(LS.FD), SocketPath(LS.SocketPath) { + LS.FD = -1; +} + +Expected<ListeningSocket> ListeningSocket::createUnix(StringRef SocketPath, + int MaxBacklog) { + +#ifdef _WIN32 + WSABalancer _; + SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); + if (MaybeWinsocket == INVALID_SOCKET) { +#else + int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); + if (MaybeWinsocket == -1) { +#endif + return llvm::make_error<StringError>(getLastSocketErrorCode(), + "socket create failed"); + } + + struct sockaddr_un Addr; + memset(&Addr, 0, sizeof(Addr)); + Addr.sun_family = AF_UNIX; + strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1); + + if (bind(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)) == -1) { + std::error_code Err = getLastSocketErrorCode(); + if (Err == std::errc::address_in_use) + ::close(MaybeWinsocket); + return llvm::make_error<StringError>(Err, "Bind error"); + } + if (listen(MaybeWinsocket, MaxBacklog) == -1) { + return llvm::make_error<StringError>(getLastSocketErrorCode(), + "Listen error"); + } + int UnixSocket; +#ifdef _WIN32 + UnixSocket = _open_osfhandle(MaybeWinsocket, 0); +#else + UnixSocket = MaybeWinsocket; +#endif // _WIN32 + return ListeningSocket{UnixSocket, SocketPath}; +} + +Expected<std::unique_ptr<raw_socket_stream>> ListeningSocket::accept() { + int AcceptFD; +#ifdef _WIN32 + SOCKET WinServerSock = _get_osfhandle(FD); + SOCKET WinAcceptSock = ::accept(WinServerSock, NULL, NULL); + AcceptFD = _open_osfhandle(WinAcceptSock, 0); +#else + AcceptFD = ::accept(FD, NULL, NULL); +#endif //_WIN32 + if (AcceptFD == -1) + return llvm::make_error<StringError>(getLastSocketErrorCode(), + "Accept failed"); + return std::make_unique<raw_socket_stream>(AcceptFD); +} + +ListeningSocket::~ListeningSocket() { + if (FD == -1) + return; + ::close(FD); + unlink(SocketPath.c_str()); +} + +static Expected<int> GetSocketFD(StringRef SocketPath) { +#ifdef _WIN32 + SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); + if (MaybeWinsocket == INVALID_SOCKET) { +#else + int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); + if (MaybeWinsocket == -1) { +#endif // _WIN32 + return llvm::make_error<StringError>(getLastSocketErrorCode(), + "Create socket failed"); + } + + struct sockaddr_un Addr; + memset(&Addr, 0, sizeof(Addr)); + Addr.sun_family = AF_UNIX; + strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1); + + int status = connect(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)); + if (status == -1) { + return llvm::make_error<StringError>(getLastSocketErrorCode(), + "Connect socket failed"); + } +#ifdef _WIN32 + return _open_osfhandle(MaybeWinsocket, 0); +#else + return MaybeWinsocket; +#endif // _WIN32 +} + +raw_socket_stream::raw_socket_stream(int SocketFD) + : raw_fd_stream(SocketFD, true) {} + +Expected<std::unique_ptr<raw_socket_stream>> +raw_socket_stream::createConnectedUnix(StringRef SocketPath) { +#ifdef _WIN32 + WSABalancer _; +#endif // _WIN32 + Expected<int> FD = GetSocketFD(SocketPath); + if (!FD) + return FD.takeError(); + return std::make_unique<raw_socket_stream>(*FD); +} + +raw_socket_stream::~raw_socket_stream() {} + +//===----------------------------------------------------------------------===// +// raw_string_ostream +//===----------------------------------------------------------------------===// + +void raw_string_ostream::write_impl(const char *Ptr, size_t Size) { + OS.append(Ptr, Size); +} diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td index c600bcaab2b3..68f452039c9b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td @@ -622,6 +622,17 @@ def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedO def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly", "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">; +// AArch64 2023 Architecture Extensions (v9.5-A) + +def FeatureCPA : SubtargetFeature<"cpa", "HasCPA", "true", + "Enable Armv9.5-A Checked Pointer Arithmetic (FEAT_CPA)">; + +def FeaturePAuthLR : SubtargetFeature<"pauth-lr", "HasPAuthLR", + "true", "Enable Armv9.5-A PAC enhancements (FEAT_PAuth_LR)">; + +def FeatureTLBIW : SubtargetFeature<"tlbiw", "HasTLBIW", "true", + "Enable ARMv9.5-A TLBI VMALL for Dirty State (FEAT_TLBIW)">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -692,7 +703,7 @@ def HasV9_4aOps : SubtargetFeature< def HasV9_5aOps : SubtargetFeature< "v9.5a", "HasV9_5aOps", "true", "Support ARM v9.5a instructions", - [HasV9_4aOps]>; + [HasV9_4aOps, FeatureCPA]>; def HasV8_0rOps : SubtargetFeature< "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", @@ -783,7 +794,7 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in def SVE2p1Unsupported : AArch64Unsupported; def SVE2Unsupported : AArch64Unsupported { - let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16, + let F = !listconcat([HasSVE2, HasSVE2orSME, HasSVE2orSME2, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm], SVE2p1Unsupported.F); } @@ -797,7 +808,7 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in def SME2p1Unsupported : AArch64Unsupported; def SME2Unsupported : AArch64Unsupported { - let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA, + let F = !listconcat([HasSME2, HasSVE2orSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32], SME2p1Unsupported.F); } @@ -807,7 +818,7 @@ def SMEUnsupported : AArch64Unsupported { SME2Unsupported.F); } -let F = [HasPAuth] in +let F = [HasPAuth, HasPAuthLR] in def PAUnsupported : AArch64Unsupported; include "AArch64SchedA53.td" diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td index d6c00be80bd9..99f256b88782 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td @@ -134,6 +134,14 @@ def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, form_duplane, shuf_to_ins]>; +// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's +def vector_unmerge_lowering : GICombineRule < + (defs root:$root), + (match (wip_match_opcode G_UNMERGE_VALUES):$root, + [{ return matchScalarizeVectorUnmerge(*${root}, MRI); }]), + (apply [{ applyScalarizeVectorUnmerge(*${root}, MRI, B); }]) +>; + def adjust_icmp_imm_matchdata : GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">; def adjust_icmp_imm : GICombineRule < @@ -251,7 +259,8 @@ def AArch64PostLegalizerLowering icmp_lowering, build_vector_lowering, lower_vector_fcmp, form_truncstore, vector_sext_inreg_to_shift, - unmerge_ext_to_unmerge, lower_mull]> { + unmerge_ext_to_unmerge, lower_mull, + vector_unmerge_lowering]> { } // Post-legalization combines which are primarily optimizations. diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 757471d6a905..bb7f4d907ffd 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -747,6 +747,15 @@ bool AArch64ExpandPseudo::expandSetTagLoop( bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opc, unsigned N) { + assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI || + Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) && + "Unexpected opcode"); + unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI) + ? RegState::Define + : 0; + unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) + ? AArch64::zsub0 + : AArch64::psub0; const TargetRegisterInfo *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); MachineInstr &MI = *MBBI; @@ -756,9 +765,8 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, assert(ImmOffset >= -256 && ImmOffset < 256 && "Immediate spill offset out of range"); BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) - .addReg( - TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), - Opc == AArch64::LDR_ZXI ? RegState::Define : 0) + .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset), + RState) .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) .addImm(ImmOffset); } @@ -1492,12 +1500,16 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); case AArch64::STR_ZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); + case AArch64::STR_PPXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); case AArch64::LDR_ZZZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); case AArch64::LDR_ZZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); case AArch64::LDR_ZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); + case AArch64::LDR_PPXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); case AArch64::BLR_RVMARKER: return expandCALL_RVMARKER(MBB, MBBI); case AArch64::BLR_BTI: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3882e843fb69..dffe69bdb900 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13708,15 +13708,18 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, APInt SplatValue; APInt SplatUndef; - unsigned SplatBitSize; + unsigned SplatBitSize = 0; bool HasAnyUndefs; BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode()); bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs); - bool IsZero = IsCnst && SplatValue == 0; - bool IsOne = IsCnst && SplatValue == 1; - bool IsMinusOne = IsCnst && SplatValue.isAllOnes(); + + bool IsSplatUniform = + SrcVT.getVectorElementType().getSizeInBits() >= SplatBitSize; + bool IsZero = IsCnst && SplatValue == 0 && IsSplatUniform; + bool IsOne = IsCnst && SplatValue == 1 && IsSplatUniform; + bool IsMinusOne = IsCnst && SplatValue.isAllOnes() && IsSplatUniform; if (SrcVT.getVectorElementType().isFloatingPoint()) { switch (CC) { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 68e87f491a09..cb63d8726744 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -2368,6 +2368,80 @@ class ClearAuth<bits<1> data, string asm> let Inst{4-0} = Rd; } +// v9.5-A FEAT_PAuth_LR + +class SignAuthFixedRegs<bits<5> opcode2, bits<6> opcode, string asm> + : I<(outs), (ins), asm, "", "", []>, + Sched<[WriteI, ReadI]> { + let Inst{31} = 0b1; // sf + let Inst{30} = 0b1; + let Inst{29} = 0b0; // S + let Inst{28-21} = 0b11010110; + let Inst{20-16} = opcode2; + let Inst{15-10} = opcode; + let Inst{9-5} = 0b11111; // Rn + let Inst{4-0} = 0b11110; // Rd +} + +def PAuthPCRelLabel16Operand : PCRelLabel<16> { + let Name = "PAuthPCRelLabel16"; + let PredicateMethod = "isPAuthPCRelLabel16Operand"; +} +def am_pauth_pcrel : Operand<OtherVT> { + let EncoderMethod = "getPAuthPCRelOpValue"; + let DecoderMethod = "DecodePCRelLabel16"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = PAuthPCRelLabel16Operand; + let OperandType = "OPERAND_PCREL"; +} + +class SignAuthPCRel<bits<2> opc, string asm> + : I<(outs), (ins am_pauth_pcrel:$label), asm, "\t$label", "", []>, + Sched<[]> { + bits<16> label; + let Inst{31} = 0b1; // sf + let Inst{30-23} = 0b11100111; + let Inst{22-21} = opc; + let Inst{20-5} = label; // imm + let Inst{4-0} = 0b11111; // Rd +} + +class SignAuthOneReg<bits<5> opcode2, bits<6> opcode, string asm> + : I<(outs), (ins GPR64:$Rn), asm, "\t$Rn", "", []>, + Sched<[]> { + bits<5> Rn; + let Inst{31} = 0b1; // sf + let Inst{30} = 0b1; + let Inst{29} = 0b0; // S + let Inst{28-21} = 0b11010110; + let Inst{20-16} = opcode2; + let Inst{15-10} = opcode; + let Inst{9-5} = Rn; + let Inst{4-0} = 0b11110; // Rd +} + +class SignAuthReturnPCRel<bits<3> opc, bits<5> op2, string asm> + : I<(outs), (ins am_pauth_pcrel:$label), asm, "\t$label", "", []>, + Sched<[WriteAtomic]> { + bits<16> label; + let Inst{31-24} = 0b01010101; + let Inst{23-21} = opc; + let Inst{20-5} = label; // imm16 + let Inst{4-0} = op2; +} + +class SignAuthReturnReg<bits<6> op3, string asm> + : I<(outs), (ins GPR64common:$Rm), asm, "\t$Rm", "", []>, + Sched<[WriteAtomic]> { + bits<5> Rm; + let Inst{31-25} = 0b1101011; + let Inst{24-21} = 0b0010; // opc + let Inst{20-16} = 0b11111; // op2 + let Inst{15-10} = op3; + let Inst{9-5} = 0b11111; // Rn + let Inst{4-0} = Rm; // op4 (Rm) +} + // Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions class BaseFlagManipulation<bit sf, bit sz, dag iops, string asm, string ops> : I<(outs), iops, asm, ops, "", []>, @@ -12446,6 +12520,58 @@ class SystemPXtI<bit L, string asm> : BaseSYSPEncoding<L, asm, "\t$op1, $Cn, $Cm, $op2, $Rt", (outs), (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XSeqPairClassOperand:$Rt)>; +//---------------------------------------------------------------------------- +// 2023 Armv9.5 Extensions +//---------------------------------------------------------------------------- + +//--- +// Checked Pointer Arithmetic (FEAT_CPA) +//--- + +def LSLImm3ShiftOperand : AsmOperandClass { + let SuperClasses = [ExtendOperandLSL64]; + let Name = "LSLImm3Shift"; + let RenderMethod = "addLSLImm3ShifterOperands"; + let DiagnosticType = "AddSubLSLImm3ShiftLarge"; +} + +def lsl_imm3_shift_operand : Operand<i32> { + let PrintMethod = "printShifter"; + let ParserMatchClass = LSLImm3ShiftOperand; +} + +// Base CPA scalar add/subtract with lsl #imm3 shift +class BaseAddSubCPA<bit isSub, string asm> : I<(outs GPR64sp:$Rd), + (ins GPR64sp:$Rn, GPR64:$Rm, lsl_imm3_shift_operand:$shift_imm), + asm, "\t$Rd, $Rn, $Rm$shift_imm", "", []>, Sched<[]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<3> shift_imm; + let Inst{31} = 0b1; + let Inst{30} = isSub; + let Inst{29-21} = 0b011010000; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b001; + let Inst{12-10} = shift_imm; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Alias for CPA scalar add/subtract with no shift +class AddSubCPAAlias<string asm, Instruction inst> + : InstAlias<asm#"\t$Rd, $Rn, $Rm", + (inst GPR64sp:$Rd, GPR64sp:$Rn, GPR64:$Rm, 0)>; + +multiclass AddSubCPA<bit isSub, string asm> { + def _shift : BaseAddSubCPA<isSub, asm>; + def _noshift : AddSubCPAAlias<asm, !cast<Instruction>(NAME#"_shift")>; +} + +class MulAccumCPA<bit isSub, string asm> + : BaseMulAccum<isSub, 0b011, GPR64, GPR64, asm, []>, Sched<[]> { + let Inst{31} = 0b1; +} //---------------------------------------------------------------------------- // Allow the size specifier tokens to be upper case, not just lower. diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 7d71c316bcb0..1cfbf4737a6f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3771,6 +3771,13 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MinOffset = -256; MaxOffset = 255; break; + case AArch64::LDR_PPXI: + case AArch64::STR_PPXI: + Scale = TypeSize::getScalable(2); + Width = TypeSize::getScalable(2 * 2); + MinOffset = -256; + MaxOffset = 254; + break; case AArch64::LDR_ZXI: case AArch64::STR_ZXI: Scale = TypeSize::getScalable(16); @@ -4089,6 +4096,16 @@ AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) { return MI.getOperand(Idx); } +const MachineOperand & +AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case AArch64::LDRBBroX: + return MI.getOperand(4); + } +} + static const TargetRegisterClass *getRegClass(const MachineInstr &MI, Register Reg) { if (MI.getParent() == nullptr) @@ -4804,6 +4821,10 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(SrcReg != AArch64::WSP); } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) Opc = AArch64::STRSui; + else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) { + Opc = AArch64::STR_PPXI; + StackID = TargetStackID::ScalableVector; + } break; case 8: if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { @@ -4980,6 +5001,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, assert(DestReg != AArch64::WSP); } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRSui; + else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) { + Opc = AArch64::LDR_PPXI; + StackID = TargetStackID::ScalableVector; + } break; case 8: if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { @@ -8779,12 +8804,23 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, // Don't outline anything used for return address signing. The outlined // function will get signed later if needed switch (MI.getOpcode()) { + case AArch64::PACM: case AArch64::PACIASP: case AArch64::PACIBSP: + case AArch64::PACIASPPC: + case AArch64::PACIBSPPC: case AArch64::AUTIASP: case AArch64::AUTIBSP: + case AArch64::AUTIASPPCi: + case AArch64::AUTIASPPCr: + case AArch64::AUTIBSPPCi: + case AArch64::AUTIBSPPCr: case AArch64::RETAA: case AArch64::RETAB: + case AArch64::RETAASPPCi: + case AArch64::RETAASPPCr: + case AArch64::RETABSPPCi: + case AArch64::RETABSPPCr: case AArch64::EMITBKEY: case AArch64::PAUTH_PROLOGUE: case AArch64::PAUTH_EPILOGUE: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 6526f6740747..db24a19fe5f8 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -111,6 +111,9 @@ public: /// Returns the immediate offset operator of a load/store. static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); + /// Returns the shift amount operator of a load/store. + static const MachineOperand &getLdStAmountOp(const MachineInstr &MI); + /// Returns whether the instruction is FP or NEON. static bool isFpOrNEON(const MachineInstr &MI); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 44b0337fe787..62b2bf490f37 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -61,6 +61,9 @@ def HasLOR : Predicate<"Subtarget->hasLOR()">, def HasPAuth : Predicate<"Subtarget->hasPAuth()">, AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; +def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">, + AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">; + def HasJS : Predicate<"Subtarget->hasJS()">, AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; @@ -289,6 +292,8 @@ def HasCHK : Predicate<"Subtarget->hasCHK()">, AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; def HasGCS : Predicate<"Subtarget->hasGCS()">, AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; +def HasCPA : Predicate<"Subtarget->hasCPA()">, + AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -1644,6 +1649,47 @@ let Predicates = [HasPAuth] in { } +// v9.5-A pointer authentication extensions + +// Always accept "pacm" as an alias for "hint #39", but don't emit it when +// disassembling if we don't have the pauth-lr feature. +let CRm = 0b0100 in { + def PACM : SystemNoOperands<0b111, "hint\t#39">; +} +def : InstAlias<"pacm", (PACM), 0>; + +let Predicates = [HasPAuthLR] in { + let Defs = [LR], Uses = [LR, SP] in { + // opcode2, opcode, asm + def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">; + def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">; + def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">; + def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">; + // opc, asm + def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">; + def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">; + // opcode2, opcode, asm + def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppc">; + def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppc">; + // opcode2, opcode, asm + def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">; + def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">; + def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">; + def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">; + } + + let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in { + // opc, op2, asm + def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">; + def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">; + // op3, asm + def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppc">; + def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppc">; + } + def : InstAlias<"pacm", (PACM), 1>; +} + + // v8.3a floating point conversion for javascript let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, @@ -6480,23 +6526,23 @@ def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; -def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))), +def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))), (EXTRACT_SUBREG V128:$src, ssub)>; def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; -def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))), +def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))), (EXTRACT_SUBREG V128:$src, dsub)>; // Floating point vector extractions are codegen'd as either a sequence of // subregister extractions, or a MOV (aka DUP here) if // the lane number is anything other than zero. -def : Pat<(vector_extract (v2f64 V128:$Rn), 0), +def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))), (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; -def : Pat<(vector_extract (v4f32 V128:$Rn), 0), +def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))), (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; -def : Pat<(vector_extract (v8f16 V128:$Rn), 0), +def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))), (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; -def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), +def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))), (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; @@ -9402,6 +9448,10 @@ let Predicates = [HasD128] in { } } +//===----------------------------===// +// 2023 Architecture Extensions: +//===----------------------------===// + let Predicates = [HasFP8] in { defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">; defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">; @@ -9443,6 +9493,19 @@ let Predicates = [HasFP8DOT4] in { defm FDOT : SIMDThreeSameVectorDOT4<"fdot">; } // End let Predicates = [HasFP8DOT4] +//===----------------------------------------------------------------------===// +// Checked Pointer Arithmetic (FEAT_CPA) +//===----------------------------------------------------------------------===// +let Predicates = [HasCPA] in { + // Scalar add/subtract + defm ADDPT : AddSubCPA<0, "addpt">; + defm SUBPT : AddSubCPA<1, "subpt">; + + // Scalar multiply-add/subtract + def MADDPT : MulAccumCPA<0, "maddpt">; + def MSUBPT : MulAccumCPA<1, "msubpt">; +} + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" include "AArch64SMEInstrInfo.td" diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index dc6d5b8950c3..b435b3ce03e7 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -62,6 +62,8 @@ STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); +STATISTIC(NumConstOffsetFolded, + "Number of const offset of index address folded"); DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming"); @@ -75,6 +77,11 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); +// The LdStConstLimit limits how far we search for const offset instructions +// when we form index address load/store instructions. +static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit", + cl::init(10), cl::Hidden); + // Enable register renaming to find additional store pairing opportunities. static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden); @@ -171,6 +178,13 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit); + // Scan the instruction list to find a register assigned with a const + // value that can be combined with the current instruction (a load or store) + // using base addressing with writeback. Scan forwards. + MachineBasicBlock::iterator + findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit, + unsigned &Offset); + // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan backwards. @@ -182,11 +196,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset); + bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI, + unsigned IndexReg, unsigned &Offset); + // Merge a pre- or post-index base register update into a ld/st instruction. MachineBasicBlock::iterator mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx); + MachineBasicBlock::iterator + mergeConstOffsetInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update, unsigned Offset, + int Scale); + // Find and merge zero store instructions. bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); @@ -199,6 +221,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and merge a base register updates before or after a ld/st instruction. bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); + // Find and merge a index ldr/st instructions into a base ld/st instruction. + bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale); + bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -481,6 +506,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { } } +static unsigned getBaseAddressOpcode(unsigned Opc) { + // TODO: Add more index address loads/stores. + switch (Opc) { + default: + llvm_unreachable("Opcode has no base address equivalent!"); + case AArch64::LDRBBroX: + return AArch64::LDRBBui; + } +} + static unsigned getPostIndexedOpcode(unsigned Opc) { switch (Opc) { default: @@ -722,6 +757,20 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) { } } +// Make sure this is a reg+reg Ld/St +static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + return false; + // Scaled instructions. + // TODO: Add more index address loads/stores. + case AArch64::LDRBBroX: + Scale = 1; + return true; + } +} + static bool isRewritableImplicitDef(unsigned Opc) { switch (Opc) { default: @@ -2018,6 +2067,63 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, return NextI; } +MachineBasicBlock::iterator +AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update, + unsigned Offset, int Scale) { + assert((Update->getOpcode() == AArch64::MOVKWi) && + "Unexpected const mov instruction to merge!"); + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E); + MachineInstr &MemMI = *I; + unsigned Mask = (1 << 12) * Scale - 1; + unsigned Low = Offset & Mask; + unsigned High = Offset - Low; + Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); + Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); + MachineInstrBuilder AddMIB, MemMIB; + + // Add IndexReg, BaseReg, High (the BaseReg may be SP) + AddMIB = + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri)) + .addDef(IndexReg) + .addUse(BaseReg) + .addImm(High >> 12) // shifted value + .addImm(12); // shift 12 + (void)AddMIB; + // Ld/St DestReg, IndexReg, Imm12 + unsigned NewOpc = getBaseAddressOpcode(I->getOpcode()); + MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .add(getLdStRegOp(MemMI)) + .add(AArch64InstrInfo::getLdStOffsetOp(MemMI)) + .addImm(Low / Scale) + .setMemRefs(I->memoperands()) + .setMIFlags(I->mergeFlagsWith(*Update)); + (void)MemMIB; + + ++NumConstOffsetFolded; + LLVM_DEBUG(dbgs() << "Creating base address load/store.\n"); + LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); + LLVM_DEBUG(PrevI->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(Update->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(I->print(dbgs())); + LLVM_DEBUG(dbgs() << " with instruction:\n "); + LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << "\n"); + + // Erase the old instructions for the block. + I->eraseFromParent(); + PrevI->eraseFromParent(); + Update->eraseFromParent(); + + return NextI; +} + bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset) { @@ -2065,6 +2171,31 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, return false; } +bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI, + MachineInstr &MI, + unsigned IndexReg, + unsigned &Offset) { + // The update instruction source and destination register must be the + // same as the load/store index register. + if (MI.getOpcode() == AArch64::MOVKWi && + TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) { + + // movz + movk hold a large offset of a Ld/St instruction. + MachineBasicBlock::iterator B = MI.getParent()->begin(); + MachineBasicBlock::iterator MBBI = &MI; + MBBI = prev_nodbg(MBBI, B); + MachineInstr &MovzMI = *MBBI; + if (MovzMI.getOpcode() == AArch64::MOVZWi) { + unsigned Low = MovzMI.getOperand(1).getImm(); + unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm(); + Offset = High + Low; + // 12-bit optionally shifted immediates are legal for adds. + return Offset >> 24 == 0; + } + } + return false; +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); @@ -2220,6 +2351,60 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } +MachineBasicBlock::iterator +AArch64LoadStoreOpt::findMatchingConstOffsetBackward( + MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) { + MachineBasicBlock::iterator B = I->getParent()->begin(); + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineInstr &MemMI = *I; + MachineBasicBlock::iterator MBBI = I; + + // If the load is the first instruction in the block, there's obviously + // not any matching load or store. + if (MBBI == B) + return E; + + // Make sure the IndexReg is killed and the shift amount is zero. + // TODO: Relex this restriction to extend, simplify processing now. + if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() || + !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() || + (AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0)) + return E; + + Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); + + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + unsigned Count = 0; + do { + MBBI = prev_nodbg(MBBI, B); + MachineInstr &MI = *MBBI; + + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; + + // If we found a match, return it. + if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) { + return MBBI; + } + + // Update the status of what the instruction clobbered and used. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + + // Otherwise, if the index register is used or modified, we have no match, + // so return early. + if (!ModifiedRegUnits.available(IndexReg) || + !UsedRegUnits.available(IndexReg)) + return E; + + } while (MBBI != B && Count < Limit); + return E; +} + bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -2404,6 +2589,34 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate return false; } +bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, + int Scale) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator E = MI.getParent()->end(); + MachineBasicBlock::iterator Update; + + // Don't know how to handle unscaled pre/post-index versions below, so bail. + if (TII->hasUnscaledLdStOffset(MI.getOpcode())) + return false; + + // Look back to try to find a const offset for index LdSt instruction. For + // example, + // mov x8, #LargeImm ; = a * (1<<12) + imm12 + // ldr x1, [x0, x8] + // merged into: + // add x8, x0, a * (1<<12) + // ldr x1, [x8, imm12] + unsigned Offset; + Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset); + if (Update != E && (Offset & (Scale - 1)) == 0) { + // Merge the imm12 into the ld/st. + MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale); + return true; + } + + return false; +} + bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { @@ -2482,6 +2695,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++MBBI; } + // 5) Find a register assigned with a const value that can be combined with + // into the load or store. e.g., + // mov x8, #LargeImm ; = a * (1<<12) + imm12 + // ldr x1, [x0, x8] + // ; becomes + // add x8, x0, a * (1<<12) + // ldr x1, [x8, imm12] + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + int Scale; + if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale)) + Modified = true; + else + ++MBBI; + } + return Modified; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index 9da59ef2a806..1a8c71888a85 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -93,16 +93,24 @@ AArch64FunctionInfo::AArch64FunctionInfo(const Function &F, // TODO: skip functions that have no instrumented allocas for optimization IsMTETagged = F.hasFnAttribute(Attribute::SanitizeMemTag); - if (!F.hasFnAttribute("branch-target-enforcement")) { - if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( - F.getParent()->getModuleFlag("branch-target-enforcement"))) - BranchTargetEnforcement = BTE->getZExtValue(); - } else { - const StringRef BTIEnable = - F.getFnAttribute("branch-target-enforcement").getValueAsString(); - assert(BTIEnable == "true" || BTIEnable == "false"); - BranchTargetEnforcement = BTIEnable == "true"; - } + // BTI/PAuthLR may be set either on the function or the module. Set Bool from + // either the function attribute or module attribute, depending on what is + // set. + // Note: the module attributed is numeric (0 or 1) but the function attribute + // is stringy ("true" or "false"). + auto TryFnThenModule = [&](StringRef AttrName, bool &Bool) { + if (F.hasFnAttribute(AttrName)) { + const StringRef V = F.getFnAttribute(AttrName).getValueAsString(); + assert(V.equals_insensitive("true") || V.equals_insensitive("false")); + Bool = V.equals_insensitive("true"); + } else if (const auto *ModVal = mdconst::extract_or_null<ConstantInt>( + F.getParent()->getModuleFlag(AttrName))) { + Bool = ModVal->getZExtValue(); + } + }; + + TryFnThenModule("branch-target-enforcement", BranchTargetEnforcement); + TryFnThenModule("branch-protection-pauth-lr", BranchProtectionPAuthLR); // The default stack probe size is 4096 if the function has no // stack-probe-size attribute. This is a safe default because it is the diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 219f83cfd32e..cd4a18bfbc23 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCLinkerOptimizationHint.h" +#include "llvm/MC/MCSymbol.h" #include <cassert> #include <optional> @@ -164,10 +165,21 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// SignWithBKey modifies the default PAC-RET mode to signing with the B key. bool SignWithBKey = false; + /// SigningInstrOffset captures the offset of the PAC-RET signing instruction + /// within the prologue, so it can be re-used for authentication in the + /// epilogue when using PC as a second salt (FEAT_PAuth_LR) + MCSymbol *SignInstrLabel = nullptr; + /// BranchTargetEnforcement enables placing BTI instructions at potential /// indirect branch destinations. bool BranchTargetEnforcement = false; + /// Indicates that SP signing should be diversified with PC as-per PAuthLR. + /// This is set by -mbranch-protection and will emit NOP instructions unless + /// the subtarget feature +pauthlr is also used (in which case non-NOP + /// instructions are emitted). + bool BranchProtectionPAuthLR = false; + /// Whether this function has an extended frame record [Ctx, FP, LR]. If so, /// bit 60 of the in-memory FP will be 1 to enable other tools to detect the /// extended record. @@ -436,10 +448,16 @@ public: bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) const; bool shouldSignWithBKey() const { return SignWithBKey; } + + MCSymbol *getSigningInstrLabel() const { return SignInstrLabel; } + void setSigningInstrLabel(MCSymbol *Label) { SignInstrLabel = Label; } + bool isMTETagged() const { return IsMTETagged; } bool branchTargetEnforcement() const { return BranchTargetEnforcement; } + bool branchProtectionPAuthLR() const { return BranchProtectionPAuthLR; } + void setHasSwiftAsyncContext(bool HasContext) { HasSwiftAsyncContext = HasContext; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp index 7576d2a899d1..7509afaeb5fe 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp @@ -60,11 +60,35 @@ FunctionPass *llvm::createAArch64PointerAuthPass() { char AArch64PointerAuth::ID = 0; +// Where PAuthLR support is not known at compile time, it is supported using +// PACM. PACM is in the hint space so has no effect when PAuthLR is not +// supported by the hardware, but will alter the behaviour of PACI*SP, AUTI*SP +// and RETAA/RETAB if the hardware supports PAuthLR. +static void BuildPACM(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + MachineInstr::MIFlag Flags, MCSymbol *PACSym = nullptr) { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto &MFnI = *MBB.getParent()->getInfo<AArch64FunctionInfo>(); + + // ADR X16,<address_of_PACIASP> + if (PACSym) { + assert(Flags == MachineInstr::FrameDestroy); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADR)) + .addReg(AArch64::X16, RegState::Define) + .addSym(PACSym); + } + + // Only emit PACM if -mbranch-protection has +pc and the target does not + // have feature +pauth-lr. + if (MFnI.branchProtectionPAuthLR() && !Subtarget.hasPAuthLR()) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM)).setMIFlag(Flags); +} + void AArch64PointerAuth::signLR(MachineFunction &MF, MachineBasicBlock::iterator MBBI) const { - const AArch64FunctionInfo *MFnI = MF.getInfo<AArch64FunctionInfo>(); - bool UseBKey = MFnI->shouldSignWithBKey(); - bool EmitCFI = MFnI->needsDwarfUnwindInfo(MF); + auto &MFnI = *MF.getInfo<AArch64FunctionInfo>(); + bool UseBKey = MFnI.shouldSignWithBKey(); + bool EmitCFI = MFnI.needsDwarfUnwindInfo(MF); bool NeedsWinCFI = MF.hasWinCFI(); MachineBasicBlock &MBB = *MBBI->getParent(); @@ -77,11 +101,29 @@ void AArch64PointerAuth::signLR(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + // PAuthLR authentication instructions need to know the value of PC at the + // point of signing (PACI*). + if (MFnI.branchProtectionPAuthLR()) { + MCSymbol *PACSym = MF.getMMI().getContext().createTempSymbol(); + MFnI.setSigningInstrLabel(PACSym); + } + // No SEH opcode for this one; it doesn't materialize into an // instruction on Windows. - BuildMI(MBB, MBBI, DL, - TII->get(UseBKey ? AArch64::PACIBSP : AArch64::PACIASP)) - .setMIFlag(MachineInstr::FrameSetup); + if (MFnI.branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) { + BuildMI(MBB, MBBI, DL, + TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSPPC + : AArch64::PACIASPPC)) + .setMIFlag(MachineInstr::FrameSetup) + ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel()); + } else { + BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, + TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP + : AArch64::PACIASP)) + .setMIFlag(MachineInstr::FrameSetup) + ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel()); + } if (EmitCFI) { unsigned CFIIndex = @@ -118,15 +160,37 @@ void AArch64PointerAuth::authenticateLR( // DW_CFA_AARCH64_negate_ra_state can't be emitted. bool TerminatorIsCombinable = TI != MBB.end() && TI->getOpcode() == AArch64::RET; + MCSymbol *PACSym = MFnI->getSigningInstrLabel(); + if (Subtarget->hasPAuth() && TerminatorIsCombinable && !NeedsWinCFI && !MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { - unsigned CombinedRetOpcode = UseBKey ? AArch64::RETAB : AArch64::RETAA; - BuildMI(MBB, TI, DL, TII->get(CombinedRetOpcode)).copyImplicitOps(*TI); + if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) { + assert(PACSym && "No PAC instruction to refer to"); + BuildMI(MBB, TI, DL, + TII->get(UseBKey ? AArch64::RETABSPPCi : AArch64::RETAASPPCi)) + .addSym(PACSym) + .copyImplicitOps(*MBBI) + .setMIFlag(MachineInstr::FrameDestroy); + } else { + BuildPACM(*Subtarget, MBB, TI, DL, MachineInstr::FrameDestroy, PACSym); + BuildMI(MBB, TI, DL, TII->get(UseBKey ? AArch64::RETAB : AArch64::RETAA)) + .copyImplicitOps(*MBBI) + .setMIFlag(MachineInstr::FrameDestroy); + } MBB.erase(TI); } else { - unsigned AutOpcode = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP; - BuildMI(MBB, MBBI, DL, TII->get(AutOpcode)) - .setMIFlag(MachineInstr::FrameDestroy); + if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) { + assert(PACSym && "No PAC instruction to refer to"); + BuildMI(MBB, MBBI, DL, + TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi)) + .addSym(PACSym) + .setMIFlag(MachineInstr::FrameDestroy); + } else { + BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym); + BuildMI(MBB, MBBI, DL, + TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP)) + .setMIFlag(MachineInstr::FrameDestroy); + } if (EmitAsyncCFI) { unsigned CFIIndex = diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 50527e08a061..344a15389063 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -316,6 +316,26 @@ def AArch64ssra : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_ssra node:$op1, node:$op2, node:$op3), (add node:$op1, (AArch64asr_p (SVEAnyPredicate), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; +// Replace pattern min(max(v1,v2),v3) by clamp +def AArch64sclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm), + [(int_aarch64_sve_sclamp node:$Zd, node:$Zn, node:$Zm), + (AArch64smin_p (SVEAllActive), + (AArch64smax_p (SVEAllActive), node:$Zd, node:$Zn), + node:$Zm) + ]>; +def AArch64uclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm), + [(int_aarch64_sve_uclamp node:$Zd, node:$Zn, node:$Zm), + (AArch64umin_p (SVEAllActive), + (AArch64umax_p (SVEAllActive), node:$Zd, node:$Zn), + node:$Zm) + ]>; +def AArch64fclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm), + [(int_aarch64_sve_fclamp node:$Zd, node:$Zn, node:$Zm), + (AArch64fminnm_p (SVEAllActive), + (AArch64fmaxnm_p (SVEAllActive), node:$Zd, node:$Zn), + node:$Zm) + ]>; + def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCVecEltisVT<1,i1> @@ -2378,11 +2398,13 @@ let Predicates = [HasSVEorSME] in { def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } let mayStore = 1, hasSideEffects = 0 in { def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } let AddedComplexity = 1 in { @@ -3802,8 +3824,8 @@ let Predicates = [HasSVE2BitPerm] in { let Predicates = [HasSVE2p1_or_HasSME] in { defm REVD_ZPmZ : sve2_int_perm_revd<"revd", AArch64revd_mt>; -defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0, int_aarch64_sve_sclamp>; -defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, int_aarch64_sve_uclamp>; +defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0, AArch64sclamp>; +defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, AArch64uclamp>; defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>; } // End HasSVE2p1_or_HasSME @@ -3813,7 +3835,7 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>; //===----------------------------------------------------------------------===// let Predicates = [HasSVE2p1_or_HasSME2] in { -defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>; +defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", AArch64fclamp>; defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>; defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>; @@ -4003,10 +4025,10 @@ def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>; } // End HasSVE2p1_or_HasSME2 //===----------------------------------------------------------------------===// -// SVE2.1 non-widening BFloat16 to BFloat16 instructions +// Non-widening BFloat16 to BFloat16 instructions //===----------------------------------------------------------------------===// -let Predicates = [HasSVE2p1, HasB16B16, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVE2orSME2, HasB16B16, UseExperimentalZeroingPseudos] in { defm BFADD_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fadd>; defm BFSUB_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fsub>; defm BFMUL_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmul>; @@ -4014,9 +4036,9 @@ defm BFMAXNM_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmaxnm>; defm BFMINNM_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fminnm>; defm BFMIN_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmin>; defm BFMAX_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmax>; -} //HasSVE2p1_or_HasSME2p1, HasB16B16, UseExperimentalZeroingPseudos +} // HasSVE2orSME2, HasB16B16, UseExperimentalZeroingPseudos -let Predicates = [HasSVE2p1, HasB16B16] in { +let Predicates = [HasSVE2orSME2, HasB16B16] in { defm BFMLA_ZPmZZ : sve_fp_3op_p_zds_a_bf<0b00, "bfmla", "BFMLA_ZPZZZ", AArch64fmla_m1>; defm BFMLS_ZPmZZ : sve_fp_3op_p_zds_a_bf<0b01, "bfmls", "BFMLS_ZPZZZ", AArch64fmls_m1>; @@ -4055,8 +4077,8 @@ defm BFMINNM_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fminnm_p>; defm BFMUL_ZZZI : sve2p1_fp_bfmul_by_indexed_elem<"bfmul", int_aarch64_sve_fmul_lane>; -defm BFCLAMP_ZZZ : sve2p1_bfclamp<"bfclamp", int_aarch64_sve_fclamp>; -} // End HasSVE2p1_or_HasSME2p1, HasB16B16 +defm BFCLAMP_ZZZ : sve2p1_bfclamp<"bfclamp", AArch64fclamp>; +} // End HasSVE2orSME2, HasB16B16 //===----------------------------------------------------------------------===// @@ -4163,3 +4185,24 @@ let Predicates = [HasSVE2orSME2, HasLUT] in { // LUTI4 (two contiguous registers) defm LUTI4_Z2ZZI : sve2_luti4_vector_vg2_index<"luti4">; } // End HasSVE2orSME2, HasLUT + +//===----------------------------------------------------------------------===// +// Checked Pointer Arithmetic (FEAT_CPA) +//===----------------------------------------------------------------------===// +let Predicates = [HasSVE, HasCPA] in { + // Add/subtract (vectors, unpredicated) + def ADD_ZZZ_CPA : sve_int_bin_cons_arit_0<0b11, 0b010, "addpt", ZPR64>; + def SUB_ZZZ_CPA : sve_int_bin_cons_arit_0<0b11, 0b011, "subpt", ZPR64>; + + // Add/subtract (vectors, predicated) + let DestructiveInstType = DestructiveBinaryComm in { + def ADD_ZPmZ_CPA : sve_int_bin_pred_arit_log<0b11, 0b00, 0b100, "addpt", ZPR64>; + def SUB_ZPmZ_CPA : sve_int_bin_pred_arit_log<0b11, 0b00, 0b101, "subpt", ZPR64>; + } + + // Multiply-add vectors, writing multiplicand + def MAD_CPA : sve_int_mad_cpa<"madpt">; + + // Multiply-add vectors, writing addend + def MLA_CPA : sve_int_mla_cpa<"mlapt">; +} diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index 65b97ff6956a..7edce4b61605 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -20,12 +20,9 @@ def A64FXModel : SchedMachineModel { let PostRAScheduler = 1; // Using PostRA sched. let CompleteModel = 1; - list<Predicate> UnsupportedFeatures = - [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth, - HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1, - HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32, - HasSMEFA64]; - + list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F, + [HasMTE, HasMatMulInt8, HasBF16, + HasPAuth, HasPAuthLR, HasCPA]); let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index 503de3bee2b8..a6fab5e6245f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -19,7 +19,7 @@ def NeoverseN2Model : SchedMachineModel { let CompleteModel = 1; list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, - [HasSVE2p1]); + [HasSVE2p1, HasPAuthLR, HasCPA]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index 726be1a547b9..75fbb85dce9d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -28,7 +28,7 @@ def NeoverseV1Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCPA]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index 3367d5d0cd31..658d7cdd23a6 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -22,7 +22,7 @@ def NeoverseV2Model : SchedMachineModel { let CompleteModel = 1; list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, - [HasSVE2p1]); + [HasSVE2p1, HasCPA]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td index e8b5f6059c9e..0b80f263e12e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -643,6 +643,14 @@ defm : TLBI<"PAALLOS", 0b110, 0b1000, 0b0001, 0b100, 0>; defm : TLBI<"PAALL", 0b110, 0b1000, 0b0111, 0b100, 0>; } +// Armv9.5-A TLBI VMALL for Dirty State +let Requires = ["AArch64::FeatureTLBIW"] in { +// op1, CRn, CRm, op2, needsreg +defm : TLBI<"VMALLWS2E1", 0b100, 0b1000, 0b0110, 0b010, 0>; +defm : TLBI<"VMALLWS2E1IS", 0b100, 0b1000, 0b0010, 0b010, 0>; +defm : TLBI<"VMALLWS2E1OS", 0b100, 0b1000, 0b0101, 0b010, 0>; +} + //===----------------------------------------------------------------------===// // MRS/MSR (system register read/write) instruction options. //===----------------------------------------------------------------------===// @@ -1946,3 +1954,22 @@ def : RWSysReg<"MDSTEPOP_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b010>; // v9.5a System PMU zero register (FEAT_SPMU2) // Op0 Op1 CRn CRm Op2 def : WOSysReg<"SPMZR_EL0", 0b10, 0b011, 0b1001, 0b1100, 0b100>; + +// v9.5a Delegated SError exceptions for EL3 (FEAT_E3DSE) +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"VDISR_EL3", 0b11, 0b110, 0b1100, 0b0001, 0b001>; +def : RWSysReg<"VSESR_EL3", 0b11, 0b110, 0b0101, 0b0010, 0b011>; + +// v9.5a Hardware Dirty State Tracking Structure (FEAT_HDBSS) +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"HDBSSBR_EL2", 0b11, 0b100, 0b0010, 0b0011, 0b010>; +def : RWSysReg<"HDBSSPROD_EL2", 0b11, 0b100, 0b0010, 0b0011, 0b011>; + +// v9.5a Hardware Accelerator for Cleaning Dirty State (FEAT_HACDBS) +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"HACDBSBR_EL2", 0b11, 0b100, 0b0010, 0b0011, 0b100>; +def : RWSysReg<"HACDBSCONS_EL2", 0b11, 0b100, 0b0010, 0b0011, 0b101>; + +// v9.5a Fine Grained Write Trap EL3 (FEAT_FGWTE3) +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"FGWTE3_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b101>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 238269cf27bd..be66790c4277 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1541,6 +1541,13 @@ public: getShiftExtendAmount() <= 4; } + bool isLSLImm3Shift() const { + if (!isShiftExtend()) + return false; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return ET == AArch64_AM::LSL && getShiftExtendAmount() <= 7; + } + template<int Width> bool isMemXExtend() const { if (!isExtend()) return false; @@ -1689,6 +1696,21 @@ public: return DiagnosticPredicateTy::Match; } + bool isPAuthPCRelLabel16Operand() const { + // PAuth PCRel16 operands are similar to regular branch targets, but only + // negative values are allowed for concrete immediates as signing instr + // should be in a lower address. + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); + if (!MCE) + return true; + int64_t Val = MCE->getValue(); + if (Val & 0b11) + return false; + return (Val <= 0) && (Val > -(1 << 18)); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. if (!Expr) @@ -1990,6 +2012,19 @@ public: Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2)); } + void addPAuthPCRelLabel16Operands(MCInst &Inst, unsigned N) const { + // PC-relative operands don't encode the low bits, so shift them off + // here. If it's a label, however, just put it on directly as there's + // not enough information now to do anything. + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); + if (!MCE) { + addExpr(Inst, getImm()); + return; + } + Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2)); + } + void addPCRelLabel19Operands(MCInst &Inst, unsigned N) const { // Branch operands don't encode the low bits, so shift them off // here. If it's a label, however, just put it on directly as there's @@ -2091,6 +2126,12 @@ public: Inst.addOperand(MCOperand::createImm(Imm)); } + void addLSLImm3ShifterOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + unsigned Imm = getShiftExtendAmount(); + Inst.addOperand(MCOperand::createImm(Imm)); + } + void addSyspXzrPairOperand(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); @@ -3664,6 +3705,8 @@ static const struct Extension { {"sme-f8f16", {AArch64::FeatureSMEF8F16}}, {"sme-f8f32", {AArch64::FeatureSMEF8F32}}, {"sme-fa64", {AArch64::FeatureSMEFA64}}, + {"cpa", {AArch64::FeatureCPA}}, + {"tlbiw", {AArch64::FeatureTLBIW}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { @@ -6064,6 +6107,9 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, "Invalid vector list, expected list with each SVE vector in the list " "4 registers apart, and the first register in the range [z0, z3] or " "[z16, z19] and with correct element type"); + case Match_AddSubLSLImm3ShiftLarge: + return Error(Loc, + "expected 'lsl' with optional integer in range [0, 7]"); default: llvm_unreachable("unexpected error code!"); } @@ -6448,6 +6494,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidMemoryIndexed8: case Match_InvalidMemoryIndexed16: case Match_InvalidCondCode: + case Match_AddSubLSLImm3ShiftLarge: case Match_AddSubRegExtendSmall: case Match_AddSubRegExtendLarge: case Match_AddSubSecondSource: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index cf2d3879292d..e3220d103ae0 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -165,6 +165,9 @@ static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm, static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm, uint64_t Address, const MCDisassembler *Decoder); +static DecodeStatus DecodePCRelLabel16(MCInst &Inst, unsigned Imm, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm, uint64_t Address, const MCDisassembler *Decoder); @@ -887,6 +890,21 @@ static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm, return Success; } +static DecodeStatus DecodePCRelLabel16(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const MCDisassembler *Decoder) { + // Immediate is encoded as the top 16-bits of an unsigned 18-bit negative + // PC-relative offset. + uint64_t ImmVal = Imm; + if (ImmVal < 0 || ImmVal > (1 << 16)) + return Fail; + ImmVal = -ImmVal; + if (!Decoder->tryAddingSymbolicOperand(Inst, (ImmVal << 2), Addr, + /*IsBranch=*/false, 0, 0, 4)) + Inst.addOperand(MCOperand::createImm(ImmVal)); + return Success; +} + static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index bdaae4dd724d..a4ace6cce463 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -6717,68 +6717,6 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, I.eraseFromParent(); return true; } - case Intrinsic::ptrauth_sign: { - Register DstReg = I.getOperand(0).getReg(); - Register ValReg = I.getOperand(2).getReg(); - uint64_t Key = I.getOperand(3).getImm(); - Register DiscReg = I.getOperand(4).getReg(); - auto DiscVal = getIConstantVRegVal(DiscReg, MRI); - bool IsDiscZero = DiscVal && DiscVal->isZero(); - - if (Key > AArch64PACKey::LAST) - return false; - - unsigned Opcodes[][4] = { - {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB}, - {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}}; - unsigned Opcode = Opcodes[IsDiscZero][Key]; - - auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg}); - - if (!IsDiscZero) { - PAC.addUse(DiscReg); - RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI); - } - - RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); - I.eraseFromParent(); - return true; - } - case Intrinsic::ptrauth_strip: { - Register DstReg = I.getOperand(0).getReg(); - Register ValReg = I.getOperand(2).getReg(); - uint64_t Key = I.getOperand(3).getImm(); - - if (Key > AArch64PACKey::LAST) - return false; - unsigned Opcode = getXPACOpcodeForKey((AArch64PACKey::ID)Key); - - MIB.buildInstr(Opcode, {DstReg}, {ValReg}); - - RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); - RBI.constrainGenericRegister(ValReg, AArch64::GPR64RegClass, MRI); - I.eraseFromParent(); - return true; - } - case Intrinsic::ptrauth_blend: { - MachineFunction &MF = *I.getParent()->getParent(); - auto RHS = getIConstantVRegVal(I.getOperand(3).getReg(), MRI); - if (RHS && (RHS->getZExtValue() <= 0xffff)) { - I.setDesc(TII.get(AArch64::MOVKXi)); - I.removeOperand(3); - I.removeOperand(1); - MachineInstrBuilder(MF, I) - .addImm(RHS->getZExtValue() & 0xffff) - .addImm(48) - .constrainAllUses(TII, TRI, RBI); - } else { - I.setDesc(TII.get(AArch64::BFMXri)); - I.removeOperand(1); - MachineInstrBuilder(MF, I).addImm(16).addImm(15).constrainAllUses( - TII, TRI, RBI); - } - return true; - } case Intrinsic::frameaddress: case Intrinsic::returnaddress: { MachineFunction &MF = *I.getParent()->getParent(); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 687063873a16..830203b61c58 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -769,6 +769,27 @@ void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, MI.eraseFromParent(); } +bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) { + auto &Unmerge = cast<GUnmerge>(MI); + Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1); + const LLT SrcTy = MRI.getType(Src1Reg); + return SrcTy.isVector() && !SrcTy.isScalable() && + Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1; +} + +void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) { + auto &Unmerge = cast<GUnmerge>(MI); + Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1); + const LLT SrcTy = MRI.getType(Src1Reg); + assert((SrcTy.isVector() && !SrcTy.isScalable()) && + "Expected a fixed length vector"); + + for (int I = 0; I < SrcTy.getNumElements(); ++I) + B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I); + MI.eraseFromParent(); +} + bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) { assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); auto Splat = getAArch64VectorSplat(MI, MRI); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index a6900b8963bb..30ef3680ae79 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -67,6 +67,7 @@ public: {"fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal}, {"fixup_aarch64_movw", 5, 16, 0}, {"fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal}, + {"fixup_aarch64_pcrel_branch16", 5, 16, PCRelFlagVal}, {"fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal}, {"fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal}, {"fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal}}; @@ -121,6 +122,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case AArch64::fixup_aarch64_movw: case AArch64::fixup_aarch64_pcrel_branch14: + case AArch64::fixup_aarch64_pcrel_branch16: case AArch64::fixup_aarch64_add_imm12: case AArch64::fixup_aarch64_ldst_imm12_scale1: case AArch64::fixup_aarch64_ldst_imm12_scale2: @@ -314,6 +316,17 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, if (Value & 0x3) Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned"); return (Value >> 2) & 0x3fff; + case AArch64::fixup_aarch64_pcrel_branch16: + // Unsigned PC-relative offset, so invert the negative immediate. + SignedValue = -SignedValue; + Value = static_cast<uint64_t>(SignedValue); + // Check valid 18-bit unsigned range. + if (SignedValue < 0 || SignedValue > ((1 << 18) - 1)) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); + // Low two bits are not encoded (4-byte alignment assumed). + if (Value & 0b11) + Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned"); + return (Value >> 2) & 0xffff; case AArch64::fixup_aarch64_pcrel_branch26: case AArch64::fixup_aarch64_pcrel_call26: if (TheTriple.isOSBinFormatCOFF() && !IsResolved && SignedValue != 0) { @@ -380,6 +393,7 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con case AArch64::fixup_aarch64_movw: case AArch64::fixup_aarch64_pcrel_branch14: + case AArch64::fixup_aarch64_pcrel_branch16: case AArch64::fixup_aarch64_add_imm12: case AArch64::fixup_aarch64_ldst_imm12_scale1: case AArch64::fixup_aarch64_ldst_imm12_scale2: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 9de40661298c..496ab18e9b19 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -186,6 +186,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(LD_PREL_LO19); case AArch64::fixup_aarch64_pcrel_branch14: return R_CLS(TSTBR14); + case AArch64::fixup_aarch64_pcrel_branch16: + Ctx.reportError(Fixup.getLoc(), + "relocation of PAC/AUT instructions is not supported"); + return ELF::R_AARCH64_NONE; case AArch64::fixup_aarch64_pcrel_branch19: return R_CLS(CONDBR19); default: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h index 767dd8805520..fdee2d5ad2bf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -43,6 +43,11 @@ enum Fixups { // The high 14 bits of a 21-bit pc-relative immediate. fixup_aarch64_pcrel_branch14, + // The high 16 bits of a 18-bit unsigned PC-relative immediate. Used by + // pointer authentication, only within a function, so no relocation can be + // generated. + fixup_aarch64_pcrel_branch16, + // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as // fixup_aarch64_pcrel_adrhi, except this is use by b.cc and generates // relocations directly when necessary. diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index dbc4323a860f..c3e12b6d8024 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -88,6 +88,12 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + /// getPAuthPCRelOpValue - Return the encoded value for a pointer + /// authentication pc-relative operand. + uint32_t getPAuthPCRelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + /// getLoadLiteralOpValue - Return the encoded value for a load-literal /// pc-relative address. uint32_t getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, @@ -327,6 +333,29 @@ uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue( return 0; } +/// getPAuthPCRelOpValue - Return the encoded value for a pointer +/// authentication pc-relative operand. +uint32_t +AArch64MCCodeEmitter::getPAuthPCRelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + // If the destination is an immediate, invert sign as it's a negative value + // that should be encoded as unsigned + if (MO.isImm()) + return -(MO.getImm()); + assert(MO.isExpr() && "Unexpected target type!"); + + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch16); + Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc())); + + ++MCNumFixups; + + // All of the information is in the fixup. + return 0; +} + /// getLoadLiteralOpValue - Return the encoded value for a load-literal /// pc-relative address. uint32_t diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td index 4f8917618ea4..70f3c2c99f0f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1259,6 +1259,12 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> { def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>; def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>; def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>; + + def : SVE_1_Op_Passthru_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>; + def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME)>; + def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>; + def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME)>; + } class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty> @@ -3712,8 +3718,14 @@ multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator i def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>; - def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>; - def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), MatrixOp8, @@ -3805,8 +3817,14 @@ multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator i def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>; - def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>; - def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>; + def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), MatrixOp8, diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9edf26052247..b7552541e950 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10433,3 +10433,34 @@ multiclass sve2_luti4_vector_vg2_index<string mnemonic> { let Inst{23-22} = idx; } } + +//===----------------------------------------------------------------------===// +// Checked Pointer Arithmetic (FEAT_CPA) +//===----------------------------------------------------------------------===// +class sve_int_mad_cpa<string asm> + : I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, ZPR64:$Zm, ZPR64:$Za), + asm, "\t$Zdn, $Zm, $Za", "", []>, Sched<[]> { + bits<5> Zdn; + bits<5> Zm; + bits<5> Za; + let Inst{31-24} = 0b01000100; + let Inst{23-22} = 0b11; // sz + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15} = 0b1; + let Inst{14-10} = 0b10110; // opc + let Inst{9-5} = Za; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = DestructiveOther; + let ElementSize = ZPR64.ElementSize; + let hasSideEffects = 0; +} + +class sve_int_mla_cpa<string asm> + : sve2_int_mla<0b11, 0b10100, asm, ZPR64, ZPR64> { + let Inst{15} = 0b1; + + let ElementSize = ZPR64.ElementSize; +} diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td index 060fb66d38f7..d2a325d5ad89 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1100,8 +1100,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", FeatureVOP3Literal, FeatureDPP8, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureA16, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS, - FeatureGWS, FeatureTrue16BitInsts + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, + FeatureTrue16BitInsts ] >; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index 0a17b1536040..4462cd8a31f1 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -75,8 +75,9 @@ enum class SchedGroupMask { DS = 1u << 7, DS_READ = 1u << 8, DS_WRITE = 1u << 9, + TRANS = 1u << 10, ALL = ALU | VALU | SALU | MFMA | VMEM | VMEM_READ | VMEM_WRITE | DS | - DS_READ | DS_WRITE, + DS_READ | DS_WRITE | TRANS, LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL) }; @@ -1435,11 +1436,12 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const { Result = false; else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) && - (TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI))) + (TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) || + TII->isTRANS(MI))) Result = true; else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) && - TII->isVALU(MI) && !TII->isMFMAorWMMA(MI)) + TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) Result = true; else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) && @@ -1476,6 +1478,10 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const { MI.mayStore() && TII->isDS(MI)) Result = true; + else if (((SGMask & SchedGroupMask::TRANS) != SchedGroupMask::NONE) && + TII->isTRANS(MI)) + Result = true; + LLVM_DEBUG( dbgs() << "For SchedGroup with mask " << format_hex((int)SGMask, 10, true) << (Result ? " could classify " : " unable to classify ") << MI); @@ -1635,10 +1641,13 @@ void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) { // Remove all existing edges from the SCHED_BARRIER that were added due to the // instruction having side effects. resetEdges(SchedBarrier, DAG); + LLVM_DEBUG(dbgs() << "Building SchedGroup for SchedBarrier with Mask: " + << MI.getOperand(0).getImm() << "\n"); auto InvertedMask = invertSchedBarrierMask((SchedGroupMask)MI.getOperand(0).getImm()); SchedGroup SG(InvertedMask, std::nullopt, DAG, TII); SG.initSchedGroup(); + // Preserve original instruction ordering relative to the SCHED_BARRIER. SG.link( SchedBarrier, @@ -1652,14 +1661,15 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const { // allowed past the SCHED_BARRIER. SchedGroupMask InvertedMask = ~Mask; - // ALU implies VALU, SALU, MFMA. + // ALU implies VALU, SALU, MFMA, TRANS. if ((InvertedMask & SchedGroupMask::ALU) == SchedGroupMask::NONE) - InvertedMask &= - ~SchedGroupMask::VALU & ~SchedGroupMask::SALU & ~SchedGroupMask::MFMA; - // VALU, SALU, MFMA implies ALU. + InvertedMask &= ~SchedGroupMask::VALU & ~SchedGroupMask::SALU & + ~SchedGroupMask::MFMA & ~SchedGroupMask::TRANS; + // VALU, SALU, MFMA, TRANS implies ALU. else if ((InvertedMask & SchedGroupMask::VALU) == SchedGroupMask::NONE || (InvertedMask & SchedGroupMask::SALU) == SchedGroupMask::NONE || - (InvertedMask & SchedGroupMask::MFMA) == SchedGroupMask::NONE) + (InvertedMask & SchedGroupMask::MFMA) == SchedGroupMask::NONE || + (InvertedMask & SchedGroupMask::TRANS) == SchedGroupMask::NONE) InvertedMask &= ~SchedGroupMask::ALU; // VMEM implies VMEM_READ, VMEM_WRITE. @@ -1678,6 +1688,9 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const { (InvertedMask & SchedGroupMask::DS_WRITE) == SchedGroupMask::NONE) InvertedMask &= ~SchedGroupMask::DS; + LLVM_DEBUG(dbgs() << "After Inverting, SchedGroup Mask: " << (int)InvertedMask + << "\n"); + return InvertedMask; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 9d7443012e3d..541a5b62450d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -169,11 +169,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); @@ -185,10 +191,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16bf16, Expand); setOperationAction(ISD::STORE, MVT::f32, Promote); AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); @@ -506,9 +517,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v12f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32); - // There are no libcalls of any kind. - for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) - setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr); + // Disable most libcalls. + for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) { + if (I < RTLIB::ATOMIC_LOAD || I > RTLIB::ATOMIC_FETCH_NAND_16) + setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr); + } setSchedulingPreference(Sched::RegPressure); setJumpIsExpensive(true); @@ -556,6 +569,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, ISD::FSUB, ISD::FNEG, ISD::FABS, ISD::AssertZext, ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN}); + + setMaxAtomicSizeInBitsSupported(64); } bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const { @@ -3055,18 +3070,26 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF || Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF; + bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64; - if (Src.getValueType() == MVT::i32) { + if (Src.getValueType() == MVT::i32 || Is64BitScalar) { // (ctlz hi:lo) -> (umin (ffbh src), 32) // (cttz hi:lo) -> (umin (ffbl src), 32) // (ctlz_zero_undef src) -> (ffbh src) // (cttz_zero_undef src) -> (ffbl src) + + // 64-bit scalar version produce 32-bit result + // (ctlz hi:lo) -> (umin (S_FLBIT_I32_B64 src), 64) + // (cttz hi:lo) -> (umin (S_FF1_I32_B64 src), 64) + // (ctlz_zero_undef src) -> (S_FLBIT_I32_B64 src) + // (cttz_zero_undef src) -> (S_FF1_I32_B64 src) SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src); if (!ZeroUndef) { - const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32); - NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32); + const SDValue ConstVal = DAG.getConstant( + Op.getValueType().getScalarSizeInBits(), SL, MVT::i32); + NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal); } - return NewOpr; + return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr); } SDValue Lo, Hi; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index ee93d9eb4c0a..2bb7b6bd0674 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1241,6 +1241,10 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]); unsigned DMaskVal = DMask->getZExtValue() & 0xf; + // dmask 0 has special semantics, do not simplify. + if (DMaskVal == 0) + return nullptr; + // Mask off values that are undefined because the dmask doesn't cover them DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1; @@ -1261,7 +1265,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, unsigned NewNumElts = DemandedElts.popcount(); if (!NewNumElts) - return UndefValue::get(IIVTy); + return PoisonValue::get(IIVTy); if (NewNumElts >= VWidth && DemandedElts.isMask()) { if (DMaskIdx >= 0) @@ -1299,7 +1303,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, if (IsLoad) { if (NewNumElts == 1) { - return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall, + return IC.Builder.CreateInsertElement(PoisonValue::get(IIVTy), NewCall, DemandedElts.countr_zero()); } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 0c21382e5c22..f03e6b8915b1 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1050,8 +1050,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, CF->isNegative(); } else { needlog = true; - needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR && - (!CF || CF->isNegative()); + needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; } } else { ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 1bed516fb5c7..5e73411cae9b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -530,6 +530,15 @@ static Value *promoteAllocaUserToVector( return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt); } + if (auto *Intr = dyn_cast<IntrinsicInst>(Inst)) { + if (Intr->getIntrinsicID() == Intrinsic::objectsize) { + Intr->replaceAllUsesWith( + Builder.getIntN(Intr->getType()->getIntegerBitWidth(), + DL.getTypeAllocSize(VectorTy))); + return nullptr; + } + } + llvm_unreachable("Unsupported call when promoting alloca to vector"); } @@ -773,8 +782,17 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) { continue; } + if (auto *Intr = dyn_cast<IntrinsicInst>(Inst)) { + if (Intr->getIntrinsicID() == Intrinsic::objectsize) { + WorkList.push_back(Inst); + continue; + } + } + // Ignore assume-like intrinsics and comparisons used in assumes. if (isAssumeLikeIntrinsic(Inst)) { + if (!Inst->use_empty()) + return RejectUser(Inst, "assume-like intrinsic cannot have any users"); UsersToRemove.push_back(Inst); continue; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index e8c04ecf39ba..fdc2077868cf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -345,6 +345,11 @@ static cl::opt<bool> EnableImageIntrinsicOptimizer( cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden); +static cl::opt<bool> + EnableLoopPrefetch("amdgpu-loop-prefetch", + cl::desc("Enable loop data prefetch on AMDGPU"), + cl::Hidden, cl::init(false)); + static cl::opt<bool> EnableMaxIlpSchedStrategy( "amdgpu-enable-max-ilp-scheduling-strategy", cl::desc("Enable scheduling strategy to maximize ILP for a single wave."), @@ -982,6 +987,8 @@ void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { } void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { + if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive)) + addPass(createLoopDataPrefetchPass()); addPass(createSeparateConstOffsetFromGEPPass()); // ReassociateGEPs exposes more opportunities for SLSR. See // the example in reassociate-geps-and-slsr.ll. diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index f1da1a61bf4d..ebe0b8551b23 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1345,3 +1345,11 @@ GCNTTIImpl::getTypeLegalizationCost(Type *Ty) const { Cost.first += (Size + 255) / 256; return Cost; } + +unsigned GCNTTIImpl::getPrefetchDistance() const { + return ST->hasPrefetch() ? 128 : 0; +} + +bool GCNTTIImpl::shouldPrefetchAddressSpace(unsigned AS) const { + return AMDGPU::isFlatGlobalAddrSpace(AS); +} diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 1e6c5bbfc0d7..cd8e9fd10bbf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -254,6 +254,16 @@ public: InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind); + + /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12. + unsigned getCacheLineSize() const override { return 128; } + + /// How much before a load we should place the prefetch instruction. + /// This is currently measured in number of IR instructions. + unsigned getPrefetchDistance() const override; + + /// \return if target want to issue a prefetch in address space \p AS. + bool shouldPrefetchAddressSpace(unsigned AS) const override; }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td index 3a895923fa4b..bc9049b4ef33 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1147,7 +1147,8 @@ def : GCNPat < >; } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts -def : Pat < +let OtherPredicates = [HasGDS] in +def : GCNPat < (SIds_ordered_count i32:$value, i16:$offset), (DS_ORDERED_COUNT $value, (as_i16imm $offset)) >; @@ -1189,7 +1190,8 @@ def : GCNPat < //===----------------------------------------------------------------------===// class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef, - string opName = ps.Mnemonic> + string opName = ps.Mnemonic, + bit hasGFX12Enc = 0> : DS_Real<ps, opName>, SIMCInstr <ps.Mnemonic, ef> { let Inst{7-0} = !if(ps.has_offset0, offset0, 0); @@ -1201,6 +1203,8 @@ class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef, let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0); let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0); let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0); + + let gds = !if(hasGFX12Enc, 0, ?); } //===----------------------------------------------------------------------===// @@ -1212,7 +1216,7 @@ let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in { defvar ps = !cast<DS_Pseudo>(NAME); def _gfx12 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12, - ps.Mnemonic>; + ps.Mnemonic, 1>; } multiclass DS_Real_Renamed_gfx12<bits<8> op, DS_Pseudo backing_pseudo, @@ -1220,7 +1224,7 @@ let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in { def _gfx12 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, backing_pseudo, SIEncodingFamily.GFX12, - real_name>, + real_name, 1>, MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX12Plus]>; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index ed2e7e4f189e..7939d0036568 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -702,6 +702,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, AMDGPU::OpName::src2_modifiers); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) && + !AMDGPU::hasGDS(STI)) { + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds); + } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) { int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(), diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 34826809c1a6..fc119aa61d01 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -540,10 +540,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, MVT::f16, Custom); setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom); - - setOperationAction( - {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP}, - MVT::f16, Promote); + setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote); // F16 - VOP2 Actions. setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand); @@ -1145,11 +1142,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, MachineMemOperand::MOStore | MachineMemOperand::MODereferenceable; - // XXX - Should this be volatile without known ordering? - Info.flags |= MachineMemOperand::MOVolatile; - switch (IntrID) { default: + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; break; case Intrinsic::amdgcn_raw_buffer_load_lds: case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: @@ -1157,6 +1153,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: { unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue(); Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8); + Info.ptrVal = CI.getArgOperand(1); return true; } } @@ -1289,8 +1286,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_VOID; unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue(); Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8); - Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore | - MachineMemOperand::MOVolatile; + Info.ptrVal = CI.getArgOperand(1); + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; return true; } case Intrinsic::amdgcn_ds_bvh_stack_rtn: { @@ -9231,7 +9228,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); MachinePointerInfo StorePtrI = LoadPtrI; - StorePtrI.V = nullptr; + LoadPtrI.V = PoisonValue::get( + PointerType::get(*DAG.getContext(), AMDGPUAS::GLOBAL_ADDRESS)); + LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS; StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; auto F = LoadMMO->getFlags() & @@ -9309,6 +9308,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); LoadPtrI.Offset = Op->getConstantOperandVal(5); MachinePointerInfo StorePtrI = LoadPtrI; + LoadPtrI.V = PoisonValue::get( + PointerType::get(*DAG.getContext(), AMDGPUAS::GLOBAL_ADDRESS)); LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS; StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; auto F = LoadMMO->getFlags() & diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 8415a3d77d3b..55ddb540c51e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -238,7 +238,7 @@ public: bool merge(const WaitcntBrackets &Other); - RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII, + RegInterval getRegInterval(const MachineInstr *MI, const MachineRegisterInfo *MRI, const SIRegisterInfo *TRI, unsigned OpNo) const; @@ -500,7 +500,6 @@ public: } // end anonymous namespace RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI, - const SIInstrInfo *TII, const MachineRegisterInfo *MRI, const SIRegisterInfo *TRI, unsigned OpNo) const { @@ -534,7 +533,7 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI, else return {-1, -1}; - const TargetRegisterClass *RC = TII->getOpRegClass(*MI, OpNo); + const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg()); unsigned Size = TRI->getRegSizeInBits(*RC); Result.second = Result.first + ((Size + 16) / 32); @@ -546,7 +545,7 @@ void WaitcntBrackets::setExpScore(const MachineInstr *MI, const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI, unsigned OpNo, unsigned Val) { - RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo); + RegInterval Interval = getRegInterval(MI, MRI, TRI, OpNo); assert(TRI->isVectorRegister(*MRI, MI->getOperand(OpNo).getReg())); for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { setRegScore(RegNo, EXP_CNT, Val); @@ -674,7 +673,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) { MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data); unsigned OpNo;//TODO: find the OpNo for this operand; - RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo); + RegInterval Interval = getRegInterval(&Inst, MRI, TRI, OpNo); for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore); @@ -686,7 +685,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, auto &Op = Inst.getOperand(I); if (!Op.isReg() || !Op.isDef()) continue; - RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I); + RegInterval Interval = getRegInterval(&Inst, MRI, TRI, I); if (T == VM_CNT) { if (Interval.first >= NUM_ALL_VGPRS) continue; @@ -1140,7 +1139,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, if (MI.getOperand(CallAddrOpIdx).isReg()) { RegInterval CallAddrOpInterval = - ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, CallAddrOpIdx); + ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOpIdx); for (int RegNo = CallAddrOpInterval.first; RegNo < CallAddrOpInterval.second; ++RegNo) @@ -1150,7 +1149,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); if (RtnAddrOpIdx != -1) { RegInterval RtnAddrOpInterval = - ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, RtnAddrOpIdx); + ScoreBrackets.getRegInterval(&MI, MRI, TRI, RtnAddrOpIdx); for (int RegNo = RtnAddrOpInterval.first; RegNo < RtnAddrOpInterval.second; ++RegNo) @@ -1202,8 +1201,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, if (Op.isTied() && Op.isUse() && TII->doesNotReadTiedSource(MI)) continue; - RegInterval Interval = - ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I); + RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, I); const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg()); for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { @@ -1782,7 +1780,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg())) continue; - RegInterval Interval = Brackets.getRegInterval(&MI, TII, MRI, TRI, I); + RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, I); // Vgpr use if (Op.isUse()) { for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 70ef1fff274a..ebe23a5eac57 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -245,6 +245,10 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) return false; + // A mayLoad instruction without a def is not a load. Likely a prefetch. + if (!get(Opc0).getNumDefs() || !get(Opc1).getNumDefs()) + return false; + if (isDS(Opc0) && isDS(Opc1)) { // FIXME: Handle this case: @@ -3654,6 +3658,9 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; + if (isLDSDMA(MIa) || isLDSDMA(MIb)) + return false; + // TODO: Should we check the address space from the MachineMemOperand? That // would allow us to distinguish objects we know don't alias based on the // underlying address space, even if it was lowered to a different one, @@ -4976,6 +4983,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + if (isDS(MI) && !ST.hasGDS()) { + const MachineOperand *GDSOp = getNamedOperand(MI, AMDGPU::OpName::gds); + if (GDSOp && GDSOp->getImm() != 0) { + ErrInfo = "GDS is not supported on this subtarget"; + return false; + } + } + if (isImage(MI)) { const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim); if (DimOp) { @@ -6897,6 +6912,15 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, Inst.eraseFromParent(); return; + case AMDGPU::S_FLBIT_I32_B64: + splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32); + Inst.eraseFromParent(); + return; + case AMDGPU::S_FF1_I32_B64: + splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32); + Inst.eraseFromParent(); + return; + case AMDGPU::S_LSHL_B32: if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHLREV_B32_e64; @@ -7830,6 +7854,61 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist, addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } +void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist, + MachineInstr &Inst, unsigned Opcode, + MachineDominatorTree *MDT) const { + // (S_FLBIT_I32_B64 hi:lo) -> + // -> (umin (V_FFBH_U32_e32 hi), (uaddsat (V_FFBH_U32_e32 lo), 32)) + // (S_FF1_I32_B64 hi:lo) -> + // ->(umin (uaddsat (V_FFBL_B32_e32 hi), 32) (V_FFBL_B32_e32 lo)) + + MachineBasicBlock &MBB = *Inst.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineBasicBlock::iterator MII = Inst; + const DebugLoc &DL = Inst.getDebugLoc(); + + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src = Inst.getOperand(1); + + const MCInstrDesc &InstDesc = get(Opcode); + + bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32; + unsigned OpcodeAdd = + ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; + + const TargetRegisterClass *SrcRC = + Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass; + const TargetRegisterClass *SrcSubRC = + RI.getSubRegisterClass(SrcRC, AMDGPU::sub0); + + MachineOperand SrcRegSub0 = + buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC); + MachineOperand SrcRegSub1 = + buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC); + + Register MidReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidReg2 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidReg3 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidReg4 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + BuildMI(MBB, MII, DL, InstDesc, MidReg1).add(SrcRegSub0); + + BuildMI(MBB, MII, DL, InstDesc, MidReg2).add(SrcRegSub1); + + BuildMI(MBB, MII, DL, get(OpcodeAdd), MidReg3) + .addReg(IsCtlz ? MidReg1 : MidReg2) + .addImm(32) + .addImm(1); // enable clamp + + BuildMI(MBB, MII, DL, get(AMDGPU::V_MIN_U32_e64), MidReg4) + .addReg(MidReg3) + .addReg(IsCtlz ? MidReg2 : MidReg1); + + MRI.replaceRegWith(Dest.getReg(), MidReg4); + + addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist); +} + void SIInstrInfo::addUsersToMoveToVALUWorklist( Register DstReg, MachineRegisterInfo &MRI, SIInstrWorklist &Worklist) const { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h index affe52046752..46eee6fae0a5 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -144,6 +144,9 @@ private: void splitScalar64BitBCNT(SIInstrWorklist &Worklist, MachineInstr &Inst) const; void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; + void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst, + unsigned Opcode, + MachineDominatorTree *MDT = nullptr) const; void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const; diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp index a3a71a8ec09a..eeb7f64aa581 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1645,9 +1645,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, // Fold the return instruction into the LDM. DeleteRet = true; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; - // We 'restore' LR into PC so it is not live out of the return block: - // Clear Restored bit. - Info.setRestored(false); } // If NoGap is true, pop consecutive registers and then leave the rest @@ -2695,8 +2692,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, const Align TargetAlign = getStackAlign(); if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) { if (CS1Spilled && !UnspilledCS1GPRs.empty()) { - for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { - unsigned Reg = UnspilledCS1GPRs[i]; + for (unsigned Reg : UnspilledCS1GPRs) { // Don't spill high register if the function is thumb. In the case of // Windows on ARM, accept R11 (frame pointer) if (!AFI->isThumbFunction() || @@ -2785,6 +2781,33 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); } +void ARMFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + + // Check if all terminators do not implicitly use LR. Then we can 'restore' LR + // into PC so it is not live out of the return block: Clear the Restored bit + // in that case. + for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { + if (Info.getReg() != ARM::LR) + continue; + if (all_of(MF, [](const MachineBasicBlock &MBB) { + return all_of(MBB.terminators(), [](const MachineInstr &Term) { + return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET || + Term.getOpcode() == ARM::t2LDMIA_RET || + Term.getOpcode() == ARM::tPOP_RET; + }); + })) { + Info.setRestored(false); + break; + } + } +} + void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const { TargetFrameLowering::getCalleeSaves(MF, SavedRegs); diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h index 16f2ce6bea6f..8d2b8beb9a58 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -59,6 +59,9 @@ public: void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + void processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp index db63facca870..d00b7853816e 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1415,6 +1415,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::GET_FPENV, MVT::i32, Legal); setOperationAction(ISD::SET_FPENV, MVT::i32, Legal); setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal); + setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal); + setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom); + setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom); } // We want to custom lower some of our intrinsics. @@ -6447,6 +6450,57 @@ SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op, return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); } +SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + SDValue Mode = Op->getOperand(1); + + // Generate nodes to build: + // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits) + SDValue Ops[] = {Chain, + DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)}; + SDValue FPSCR = + DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops); + Chain = FPSCR.getValue(1); + FPSCR = FPSCR.getValue(0); + + SDValue FPSCRMasked = + DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR, + DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32)); + SDValue InputMasked = + DAG.getNode(ISD::AND, DL, MVT::i32, Mode, + DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32)); + FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked); + + SDValue Ops2[] = { + Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR}; + return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); +} + +SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + + // To get the default FP mode all control bits are cleared: + // FPSCR = FPSCR & (FPStatusBits | FPReservedBits) + SDValue Ops[] = {Chain, + DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)}; + SDValue FPSCR = + DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops); + Chain = FPSCR.getValue(1); + FPSCR = FPSCR.getValue(0); + + SDValue FPSCRMasked = DAG.getNode( + ISD::AND, DL, MVT::i32, FPSCR, + DAG.getConstant(ARM::FPStatusBits | ARM::FPReservedBits, DL, MVT::i32)); + SDValue Ops2[] = {Chain, + DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), + FPSCRMasked}; + return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); +} + static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDLoc dl(N); @@ -10557,6 +10611,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget); case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG); case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG); + case ISD::SET_FPMODE: + return LowerSET_FPMODE(Op, DAG); + case ISD::RESET_FPMODE: + return LowerRESET_FPMODE(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h index 6c2b92de7a1d..f398b01f4186 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h @@ -375,6 +375,14 @@ class VectorType; // Bit position of rounding mode bits in FPSCR. const unsigned RoundingBitsPos = 22; + + // Bits of floating-point status. These are NZCV flags, QC bit and cumulative + // FP exception bits. + const unsigned FPStatusBits = 0xf800009f; + + // Some bits in the FPSCR are not yet defined. They must be preserved when + // modifying the contents. + const unsigned FPReservedBits = 0x00006060; } // namespace ARM /// Define some predicates that are used for node matching. @@ -835,6 +843,8 @@ class VectorType; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td index 800527bcf756..55d3efbd9b9a 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -2675,6 +2675,7 @@ def : Pat<(get_fpenv), (VMRS)>; def : Pat<(set_fpenv GPRnopc:$Rt), (VMSR GPRnopc:$Rt)>; def : Pat<(reset_fpenv), (VMSR (MOVi 0))>, Requires<[IsARM]>; def : Pat<(reset_fpenv), (VMSR (tMOVi8 0))>, Requires<[IsThumb]>; +def : Pat<(get_fpmode), (VMRS)>; //===----------------------------------------------------------------------===// // Assembler aliases. diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a679699a66c7..ed9d30c3c3ab 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2604,16 +2604,14 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { } // Re-schedule loads. - for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { - unsigned Base = LdBases[i]; + for (unsigned Base : LdBases) { SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; if (Lds.size() > 1) RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap); } // Re-schedule stores. - for (unsigned i = 0, e = StBases.size(); i != e; ++i) { - unsigned Base = StBases[i]; + for (unsigned Base : StBases) { SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; if (Sts.size() > 1) RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap); diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp index d3ff12a1f7b3..621852f2453f 100644 --- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp +++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp @@ -63,57 +63,56 @@ ResourceBase::ResourceBase(uint32_t I, FrontendResource R) RangeSize = ArrTy->getNumElements(); } -StringRef ResourceBase::getComponentTypeName(ComponentType CompType) { - switch (CompType) { - case ComponentType::LastEntry: - case ComponentType::Invalid: +StringRef ResourceBase::getElementTypeName(ElementType ElTy) { + switch (ElTy) { + case ElementType::Invalid: return "invalid"; - case ComponentType::I1: + case ElementType::I1: return "i1"; - case ComponentType::I16: + case ElementType::I16: return "i16"; - case ComponentType::U16: + case ElementType::U16: return "u16"; - case ComponentType::I32: + case ElementType::I32: return "i32"; - case ComponentType::U32: + case ElementType::U32: return "u32"; - case ComponentType::I64: + case ElementType::I64: return "i64"; - case ComponentType::U64: + case ElementType::U64: return "u64"; - case ComponentType::F16: + case ElementType::F16: return "f16"; - case ComponentType::F32: + case ElementType::F32: return "f32"; - case ComponentType::F64: + case ElementType::F64: return "f64"; - case ComponentType::SNormF16: + case ElementType::SNormF16: return "snorm_f16"; - case ComponentType::UNormF16: + case ElementType::UNormF16: return "unorm_f16"; - case ComponentType::SNormF32: + case ElementType::SNormF32: return "snorm_f32"; - case ComponentType::UNormF32: + case ElementType::UNormF32: return "unorm_f32"; - case ComponentType::SNormF64: + case ElementType::SNormF64: return "snorm_f64"; - case ComponentType::UNormF64: + case ElementType::UNormF64: return "unorm_f64"; - case ComponentType::PackedS8x32: + case ElementType::PackedS8x32: return "p32i8"; - case ComponentType::PackedU8x32: + case ElementType::PackedU8x32: return "p32u8"; } - llvm_unreachable("All ComponentType enums are handled in switch"); + llvm_unreachable("All ElementType enums are handled in switch"); } -void ResourceBase::printComponentType(Kinds Kind, ComponentType CompType, - unsigned Alignment, raw_ostream &OS) { +void ResourceBase::printElementType(Kinds Kind, ElementType ElTy, + unsigned Alignment, raw_ostream &OS) { switch (Kind) { default: // TODO: add vector size. - OS << right_justify(getComponentTypeName(CompType), Alignment); + OS << right_justify(getElementTypeName(ElTy), Alignment); break; case Kinds::RawBuffer: OS << right_justify("byte", Alignment); @@ -232,19 +231,13 @@ void ResourceBase::print(raw_ostream &OS, StringRef IDPrefix, OS << right_justify("unbounded", 6) << "\n"; } -UAVResource::UAVResource(uint32_t I, FrontendResource R) - : ResourceBase(I, R), Shape(R.getResourceKind()), GloballyCoherent(false), - HasCounter(false), IsROV(R.getIsROV()), ExtProps() { - parseSourceType(R.getSourceType()); -} - void UAVResource::print(raw_ostream &OS) const { OS << "; " << left_justify(Name, 31); OS << right_justify("UAV", 10); - printComponentType( - Shape, ExtProps.ElementType.value_or(ComponentType::Invalid), 8, OS); + printElementType(Shape, ExtProps.ElementType.value_or(ElementType::Invalid), + 8, OS); // FIXME: support SampleCount. // See https://github.com/llvm/llvm-project/issues/58175 @@ -253,35 +246,6 @@ void UAVResource::print(raw_ostream &OS) const { ResourceBase::print(OS, "U", "u"); } -// FIXME: Capture this in HLSL source. I would go do this right now, but I want -// to get this in first so that I can make sure to capture all the extra -// information we need to remove the source type string from here (See issue: -// https://github.com/llvm/llvm-project/issues/57991). -void UAVResource::parseSourceType(StringRef S) { - S = S.substr(S.find("<") + 1); - - constexpr size_t PrefixLen = StringRef("vector<").size(); - if (S.starts_with("vector<")) - S = S.substr(PrefixLen, S.find(",") - PrefixLen); - else - S = S.substr(0, S.find(">")); - - ComponentType ElTy = StringSwitch<ResourceBase::ComponentType>(S) - .Case("bool", ComponentType::I1) - .Case("int16_t", ComponentType::I16) - .Case("uint16_t", ComponentType::U16) - .Case("int32_t", ComponentType::I32) - .Case("uint32_t", ComponentType::U32) - .Case("int64_t", ComponentType::I64) - .Case("uint64_t", ComponentType::U64) - .Case("half", ComponentType::F16) - .Case("float", ComponentType::F32) - .Case("double", ComponentType::F64) - .Default(ComponentType::Invalid); - if (ElTy != ComponentType::Invalid) - ExtProps.ElementType = ElTy; -} - ConstantBuffer::ConstantBuffer(uint32_t I, hlsl::FrontendResource R) : ResourceBase(I, R) {} @@ -294,7 +258,7 @@ void ConstantBuffer::print(raw_ostream &OS) const { OS << right_justify("cbuffer", 10); - printComponentType(Kinds::CBuffer, ComponentType::Invalid, 8, OS); + printElementType(Kinds::CBuffer, ElementType::Invalid, 8, OS); printKind(Kinds::CBuffer, 12, OS, /*SRV*/ false, /*HasCounter*/ false); // Print the binding part. diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h index cb39020bc61e..5f8b0badd145 100644 --- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h +++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h @@ -46,38 +46,13 @@ protected: bool SRV = false, bool HasCounter = false, uint32_t SampleCount = 0); - // The value ordering of this enumeration is part of the DXIL ABI. Elements - // can only be added to the end, and not removed. - enum class ComponentType : uint32_t { - Invalid = 0, - I1, - I16, - U16, - I32, - U32, - I64, - U64, - F16, - F32, - F64, - SNormF16, - UNormF16, - SNormF32, - UNormF32, - SNormF64, - UNormF64, - PackedS8x32, - PackedU8x32, - LastEntry - }; - - static StringRef getComponentTypeName(ComponentType CompType); - static void printComponentType(Kinds Kind, ComponentType CompType, - unsigned Alignment, raw_ostream &OS); + static StringRef getElementTypeName(hlsl::ElementType CompType); + static void printElementType(Kinds Kind, hlsl::ElementType CompType, + unsigned Alignment, raw_ostream &OS); public: struct ExtendedProperties { - std::optional<ComponentType> ElementType; + std::optional<hlsl::ElementType> ElementType; // The value ordering of this enumeration is part of the DXIL ABI. Elements // can only be added to the end, and not removed. @@ -102,7 +77,9 @@ class UAVResource : public ResourceBase { void parseSourceType(StringRef S); public: - UAVResource(uint32_t I, hlsl::FrontendResource R); + UAVResource(uint32_t I, hlsl::FrontendResource R) + : ResourceBase(I, R), Shape(R.getResourceKind()), GloballyCoherent(false), + HasCounter(false), IsROV(R.getIsROV()), ExtProps{R.getElementType()} {} MDNode *write() const; void print(raw_ostream &O) const; diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index e2d0aeee092e..ebb269c6e6e0 100644 --- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -937,8 +937,7 @@ void DXILBitcodeWriter::writeAttributeTable() { Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); SmallVector<uint64_t, 64> Record; - for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - AttributeList AL = Attrs[i]; + for (AttributeList AL : Attrs) { for (unsigned i : AL.indexes()) { AttributeSet AS = AL.getAttributes(i); if (AS.hasAttributes()) diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp index 47fbf0a69518..dae316ccb5e9 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -2860,8 +2860,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, // For each defined register, if it is a constant, create an instruction // NewR = const // and replace all uses of the defined register with NewR. - for (unsigned i = 0, n = DefRegs.size(); i < n; ++i) { - unsigned R = DefRegs[i]; + for (unsigned R : DefRegs) { const LatticeCell &L = Inputs.get(R); if (L.isBottom()) continue; diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index efb0d405fef2..e08566718d7c 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -1337,8 +1337,7 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb, // segments that are used in the output. unsigned Seg0 = ~0u, Seg1 = ~0u; - for (int I = 0, E = SegMap.size(); I != E; ++I) { - unsigned X = SegMap[I]; + for (unsigned X : SegMap) { if (X == ~0u) continue; if (Seg0 == ~0u) @@ -2037,8 +2036,7 @@ HvxSelector::completeToPerfect(ArrayRef<uint32_t> Completions, unsigned Width) { #ifndef NDEBUG // Check that we have generated a valid completion. uint32_t OrAll = 0; - for (unsigned I = 0, E = Comps.size(); I != E; ++I) { - uint32_t C = Comps[I]; + for (uint32_t C : Comps) { assert(isPowerOf2_32(C)); OrAll |= C; } diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 51ef72b873a5..7777ae23e8ae 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -1062,7 +1062,7 @@ void PolynomialMultiplyRecognize::promoteTo(Instruction *In, // Promote immediates. for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i))) - if (CI->getType()->getBitWidth() < DestBW) + if (CI->getBitWidth() < DestBW) In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue())); } } @@ -1577,7 +1577,7 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, static bool hasZeroSignBit(const Value *V) { if (const auto *CI = dyn_cast<const ConstantInt>(V)) - return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0; + return CI->getValue().isNonNegative(); const Instruction *I = dyn_cast<const Instruction>(V); if (!I) return false; @@ -1688,7 +1688,7 @@ void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) { if (I->getOpcode() != Instruction::Or) return nullptr; ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1)); - if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) + if (!Msb || !Msb->getValue().isSignMask()) return nullptr; if (!hasZeroSignBit(I->getOperand(0))) return nullptr; diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index fffd5abd9f8b..0740ac58a338 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -554,7 +554,7 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT, // Add qfloat subtarget feature by default to v68 and above // unless explicitely disabled if (checkFeature(X, Hexagon::ExtensionHVXV68) && - ArchFS.find("-hvx-qfloat", 0) == std::string::npos) { + !ArchFS.contains("-hvx-qfloat")) { llvm::FeatureBitset Features = X->getFeatureBits(); X->setFeatureBits(Features.set(Hexagon::ExtensionHVXQFloat)); } diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index 14bcef7c7d26..6d8ef1bf96cb 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -177,6 +177,34 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, } } +static inline std::pair<MCFixupKind, MCFixupKind> +getRelocPairForSize(unsigned Size) { + switch (Size) { + default: + llvm_unreachable("unsupported fixup size"); + case 6: + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD6), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB6)); + case 8: + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD8), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB8)); + case 16: + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD16), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB16)); + case 32: + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD32), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB32)); + case 64: + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD64), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB64)); + } +} + bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const { // We mostly follow binutils' convention here: align to 4-byte boundary with a @@ -191,6 +219,56 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, return true; } +bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, + const MCFragment &F, + const MCFixup &Fixup, + const MCValue &Target, + uint64_t &FixedValue) const { + std::pair<MCFixupKind, MCFixupKind> FK; + uint64_t FixedValueA, FixedValueB; + const MCSection &SecA = Target.getSymA()->getSymbol().getSection(); + const MCSection &SecB = Target.getSymB()->getSymbol().getSection(); + + // We need record relocation if SecA != SecB. Usually SecB is same as the + // section of Fixup, which will be record the relocation as PCRel. If SecB + // is not same as the section of Fixup, it will report error. Just return + // false and then this work can be finished by handleFixup. + if (&SecA != &SecB) + return false; + + // In SecA == SecB case. If the linker relaxation is enabled, we need record + // the ADD, SUB relocations. Otherwise the FixedValue has already been + // calculated out in evaluateFixup, return true and avoid record relocations. + if (!STI.hasFeature(LoongArch::FeatureRelax)) + return true; + + switch (Fixup.getKind()) { + case llvm::FK_Data_1: + FK = getRelocPairForSize(8); + break; + case llvm::FK_Data_2: + FK = getRelocPairForSize(16); + break; + case llvm::FK_Data_4: + FK = getRelocPairForSize(32); + break; + case llvm::FK_Data_8: + FK = getRelocPairForSize(64); + break; + default: + llvm_unreachable("unsupported fixup size"); + } + MCValue A = MCValue::get(Target.getSymA(), nullptr, Target.getConstant()); + MCValue B = MCValue::get(Target.getSymB()); + auto FA = MCFixup::create(Fixup.getOffset(), nullptr, std::get<0>(FK)); + auto FB = MCFixup::create(Fixup.getOffset(), nullptr, std::get<1>(FK)); + auto &Asm = Layout.getAssembler(); + Asm.getWriter().recordRelocation(Asm, Layout, &F, FA, A, FixedValueA); + Asm.getWriter().recordRelocation(Asm, Layout, &F, FB, B, FixedValueB); + FixedValue = FixedValueA - FixedValueB; + return true; +} + std::unique_ptr<MCObjectTargetWriter> LoongArchAsmBackend::createObjectTargetWriter() const { return createLoongArchELFObjectWriter( diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h index d1fbf788e8a8..fef0e84600a7 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h @@ -31,10 +31,15 @@ class LoongArchAsmBackend : public MCAsmBackend { public: LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, const MCTargetOptions &Options) - : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI), - Is64Bit(Is64Bit), TargetOptions(Options) {} + : MCAsmBackend(llvm::endianness::little, + LoongArch::fixup_loongarch_relax), + STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {} ~LoongArchAsmBackend() override {} + bool handleAddSubRelocations(const MCAsmLayout &Layout, const MCFragment &F, + const MCFixup &Fixup, const MCValue &Target, + uint64_t &FixedValue) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, bool IsResolved, diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h index ba2d6718cdf9..178fa6e5262b 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h @@ -106,7 +106,9 @@ enum Fixups { // 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i. fixup_loongarch_tls_gd_pc_hi20, // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. - fixup_loongarch_tls_gd_hi20 + fixup_loongarch_tls_gd_hi20, + // Generate an R_LARCH_RELAX which indicates the linker may relax here. + fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX }; } // end namespace LoongArch } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 22c662a79d87..385b3b74c34d 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10986,7 +10986,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) { case Intrinsic::ppc_cfence: { assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); - assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); SDValue Val = Op.getOperand(ArgStart + 1); EVT Ty = Val.getValueType(); if (Ty == MVT::i128) { @@ -10994,9 +10993,11 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, // ordering? Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val); } + unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE; + EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; return SDValue( - DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val), + DAG.getMachineNode(Opcode, DL, MVT::Other, + DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val), Op.getOperand(0)), 0); } @@ -11827,7 +11828,7 @@ Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder, // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. - if (isa<LoadInst>(Inst) && Subtarget.isPPC64()) + if (isa<LoadInst>(Inst)) return Builder.CreateCall( Intrinsic::getDeclaration( Builder.GetInsertBlock()->getParent()->getParent(), diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index d0a6cced1b19..aaced58defe6 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1067,9 +1067,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable( const MachineInstr &MI) const { switch (MI.getOpcode()) { default: - // This function should only be called for opcodes with the ReMaterializable - // flag set. - llvm_unreachable("Unknown rematerializable operation!"); + // Let base implementaion decide. break; case PPC::LI: case PPC::LI8: @@ -3179,9 +3177,11 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass. + case PPC::CFENCE: case PPC::CFENCE8: { auto Val = MI.getOperand(0).getReg(); - BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); + unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW; + BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val); BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP)) .addImm(PPC::PRED_NE_MINUS) .addReg(PPC::CR7) diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 6199785206b2..b1601739fd45 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -5260,6 +5260,9 @@ def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs), "hashchkp $RB, $addr", IIC_IntGeneral, []>; } +let Defs = [CR7], Itinerary = IIC_LdStSync in +def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>; + // Now both high word and low word are reversed, next // swap the high word and low word. def : Pat<(i64 (bitreverse i64:$A)), diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 8f03a7ac41d3..28ec999157c6 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -85,13 +85,12 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { + auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op); unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; - auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op); if (XLen == 32 && ST.hasStdExtD()) { - LLT IdxZeroTy = G_MERGE_VALUES ? s64 : s32; - LLT IdxOneTy = G_MERGE_VALUES ? s32 : s64; - MergeUnmergeActions.legalFor({IdxZeroTy, IdxOneTy}); + MergeUnmergeActions.legalIf( + all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32))); } MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen) .widenScalarToNextPow2(BigTyIdx, XLen) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 00b4751905f6..30ed36525e29 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -113,6 +113,15 @@ enum { UsesVXRMShift = HasRoundModeOpShift + 1, UsesVXRMMask = 1 << UsesVXRMShift, + + // Indicates whether these instructions can partially overlap between source + // registers and destination registers according to the vector spec. + // 0 -> not a vector pseudo + // 1 -> default value for vector pseudos. not widening or narrowing. + // 2 -> narrowing case + // 3 -> widening case + TargetOverlapConstraintTypeShift = UsesVXRMShift + 1, + TargetOverlapConstraintTypeMask = 3ULL << TargetOverlapConstraintTypeShift, }; enum VLMUL : uint8_t { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td index 294927aecb94..a66dd135ae5f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -107,15 +107,15 @@ def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">, def FeatureStdExtZfh : SubtargetFeature<"zfh", "HasStdExtZfh", "true", "'Zfh' (Half-Precision Floating-Point)", - [FeatureStdExtF]>; + [FeatureStdExtZfhmin]>; def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">, AssemblerPredicate<(all_of FeatureStdExtZfh), "'Zfh' (Half-Precision Floating-Point)">; def NoStdExtZfh : Predicate<"!Subtarget->hasStdExtZfh()">; def HasStdExtZfhOrZfhmin - : Predicate<"Subtarget->hasStdExtZfhOrZfhmin()">, - AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin), + : Predicate<"Subtarget->hasStdExtZfhmin()">, + AssemblerPredicate<(all_of FeatureStdExtZfhmin), "'Zfh' (Half-Precision Floating-Point) or " "'Zfhmin' (Half-Precision Floating-Point Minimal)">; @@ -146,15 +146,15 @@ def HasStdExtZhinxmin : Predicate<"Subtarget->hasStdExtZhinxmin()">, def FeatureStdExtZhinx : SubtargetFeature<"zhinx", "HasStdExtZhinx", "true", "'Zhinx' (Half Float in Integer)", - [FeatureStdExtZfinx]>; + [FeatureStdExtZhinxmin]>; def HasStdExtZhinx : Predicate<"Subtarget->hasStdExtZhinx()">, AssemblerPredicate<(all_of FeatureStdExtZhinx), "'Zhinx' (Half Float in Integer)">; def NoStdExtZhinx : Predicate<"!Subtarget->hasStdExtZhinx()">; def HasStdExtZhinxOrZhinxmin - : Predicate<"Subtarget->hasStdExtZhinx() || Subtarget->hasStdExtZhinxmin()">, - AssemblerPredicate<(any_of FeatureStdExtZhinx, FeatureStdExtZhinxmin), + : Predicate<"Subtarget->hasStdExtZhinxmin()">, + AssemblerPredicate<(all_of FeatureStdExtZhinxmin), "'Zhinx' (Half Float in Integer) or " "'Zhinxmin' (Half Float in Integer Minimal)">; @@ -472,7 +472,7 @@ def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">, def FeatureStdExtZvfbfmin : SubtargetFeature<"experimental-zvfbfmin", "HasStdExtZvfbfmin", "true", "'Zvbfmin' (Vector BF16 Converts)", - [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>; + [FeatureStdExtZve32f]>; def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">, AssemblerPredicate<(all_of FeatureStdExtZvfbfmin), "'Zvfbfmin' (Vector BF16 Converts)">; @@ -480,23 +480,23 @@ def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">, def FeatureStdExtZvfbfwma : SubtargetFeature<"experimental-zvfbfwma", "HasStdExtZvfbfwma", "true", "'Zvfbfwma' (Vector BF16 widening mul-add)", - [FeatureStdExtZvfbfmin]>; + [FeatureStdExtZvfbfmin, FeatureStdExtZfbfmin]>; def HasStdExtZvfbfwma : Predicate<"Subtarget->hasStdExtZvfbfwma()">, AssemblerPredicate<(all_of FeatureStdExtZvfbfwma), "'Zvfbfwma' (Vector BF16 widening mul-add)">; def HasVInstructionsBF16 : Predicate<"Subtarget->hasVInstructionsBF16()">; -def FeatureStdExtZvfh - : SubtargetFeature<"zvfh", "HasStdExtZvfh", "true", - "'Zvfh' (Vector Half-Precision Floating-Point)", - [FeatureStdExtZve32f, FeatureStdExtZfhmin]>; - def FeatureStdExtZvfhmin : SubtargetFeature<"zvfhmin", "HasStdExtZvfhmin", "true", "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal)", [FeatureStdExtZve32f]>; +def FeatureStdExtZvfh + : SubtargetFeature<"zvfh", "HasStdExtZvfh", "true", + "'Zvfh' (Vector Half-Precision Floating-Point)", + [FeatureStdExtZvfhmin, FeatureStdExtZfhmin]>; + def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">; def HasVInstructionsF16Minimal : Predicate<"Subtarget->hasVInstructionsF16Minimal()">, @@ -561,14 +561,14 @@ def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">, "'Zawrs' (Wait on Reservation Set)">; def FeatureStdExtZvkb - : SubtargetFeature<"experimental-zvkb", "HasStdExtZvkb", "true", + : SubtargetFeature<"zvkb", "HasStdExtZvkb", "true", "'Zvkb' (Vector Bit-manipulation used in Cryptography)">; def HasStdExtZvkb : Predicate<"Subtarget->hasStdExtZvkb()">, AssemblerPredicate<(all_of FeatureStdExtZvkb), "'Zvkb' (Vector Bit-manipulation used in Cryptography)">; def FeatureStdExtZvbb - : SubtargetFeature<"experimental-zvbb", "HasStdExtZvbb", "true", + : SubtargetFeature<"zvbb", "HasStdExtZvbb", "true", "'Zvbb' (Vector basic bit-manipulation instructions.)", [FeatureStdExtZvkb]>; def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">, @@ -576,35 +576,35 @@ def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">, "'Zvbb' (Vector basic bit-manipulation instructions.)">; def FeatureStdExtZvbc - : SubtargetFeature<"experimental-zvbc", "HasStdExtZvbc", "true", + : SubtargetFeature<"zvbc", "HasStdExtZvbc", "true", "'Zvbc' (Vector Carryless Multiplication)">; def HasStdExtZvbc : Predicate<"Subtarget->hasStdExtZvbc()">, AssemblerPredicate<(all_of FeatureStdExtZvbc), "'Zvbc' (Vector Carryless Multiplication)">; def FeatureStdExtZvkg - : SubtargetFeature<"experimental-zvkg", "HasStdExtZvkg", "true", + : SubtargetFeature<"zvkg", "HasStdExtZvkg", "true", "'Zvkg' (Vector GCM instructions for Cryptography)">; def HasStdExtZvkg : Predicate<"Subtarget->hasStdExtZvkg()">, AssemblerPredicate<(all_of FeatureStdExtZvkg), "'Zvkg' (Vector GCM instructions for Cryptography)">; def FeatureStdExtZvkned - : SubtargetFeature<"experimental-zvkned", "HasStdExtZvkned", "true", + : SubtargetFeature<"zvkned", "HasStdExtZvkned", "true", "'Zvkned' (Vector AES Encryption & Decryption (Single Round))">; def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">, AssemblerPredicate<(all_of FeatureStdExtZvkned), "'Zvkned' (Vector AES Encryption & Decryption (Single Round))">; def FeatureStdExtZvknha - : SubtargetFeature<"experimental-zvknha", "HasStdExtZvknha", "true", + : SubtargetFeature<"zvknha", "HasStdExtZvknha", "true", "'Zvknha' (Vector SHA-2 (SHA-256 only))">; def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">, AssemblerPredicate<(all_of FeatureStdExtZvknha), "'Zvknha' (Vector SHA-2 (SHA-256 only))">; def FeatureStdExtZvknhb - : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true", + : SubtargetFeature<"zvknhb", "HasStdExtZvknhb", "true", "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))", [FeatureStdExtZve64x]>; def HasStdExtZvknhb : Predicate<"Subtarget->hasStdExtZvknhb()">, @@ -616,59 +616,59 @@ def HasStdExtZvknhaOrZvknhb : Predicate<"Subtarget->hasStdExtZvknha() || Subtarg "'Zvknha' or 'Zvknhb' (Vector SHA-2)">; def FeatureStdExtZvksed - : SubtargetFeature<"experimental-zvksed", "HasStdExtZvksed", "true", + : SubtargetFeature<"zvksed", "HasStdExtZvksed", "true", "'Zvksed' (SM4 Block Cipher Instructions)">; def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">, AssemblerPredicate<(all_of FeatureStdExtZvksed), "'Zvksed' (SM4 Block Cipher Instructions)">; def FeatureStdExtZvksh - : SubtargetFeature<"experimental-zvksh", "HasStdExtZvksh", "true", + : SubtargetFeature<"zvksh", "HasStdExtZvksh", "true", "'Zvksh' (SM3 Hash Function Instructions)">; def HasStdExtZvksh : Predicate<"Subtarget->hasStdExtZvksh()">, AssemblerPredicate<(all_of FeatureStdExtZvksh), "'Zvksh' (SM3 Hash Function Instructions)">; def FeatureStdExtZvkt - : SubtargetFeature<"experimental-zvkt", "HasStdExtZvkt", "true", + : SubtargetFeature<"zvkt", "HasStdExtZvkt", "true", "'Zvkt' (Vector Data-Independent Execution Latency)">; // Zvk short-hand extensions def FeatureStdExtZvkn - : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true", + : SubtargetFeature<"zvkn", "HasStdExtZvkn", "true", "This extension is shorthand for the following set of " "other extensions: Zvkned, Zvknhb, Zvkb and Zvkt.", [FeatureStdExtZvkned, FeatureStdExtZvknhb, FeatureStdExtZvkb, FeatureStdExtZvkt]>; def FeatureStdExtZvknc - : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true", + : SubtargetFeature<"zvknc", "HasStdExtZvknc", "true", "This extension is shorthand for the following set of " "other extensions: Zvkn and Zvbc.", [FeatureStdExtZvkn, FeatureStdExtZvbc]>; def FeatureStdExtZvkng - : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true", + : SubtargetFeature<"zvkng", "HasStdExtZvkng", "true", "This extension is shorthand for the following set of " "other extensions: Zvkn and Zvkg.", [FeatureStdExtZvkn, FeatureStdExtZvkg]>; def FeatureStdExtZvks - : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true", + : SubtargetFeature<"zvks", "HasStdExtZvks", "true", "This extension is shorthand for the following set of " "other extensions: Zvksed, Zvksh, Zvkb and Zvkt.", [FeatureStdExtZvksed, FeatureStdExtZvksh, FeatureStdExtZvkb, FeatureStdExtZvkt]>; def FeatureStdExtZvksc - : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true", + : SubtargetFeature<"zvksc", "HasStdExtZvksc", "true", "This extension is shorthand for the following set of " "other extensions: Zvks and Zvbc.", [FeatureStdExtZvks, FeatureStdExtZvbc]>; def FeatureStdExtZvksg - : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true", + : SubtargetFeature<"zvksg", "HasStdExtZvksg", "true", "This extension is shorthand for the following set of " "other extensions: Zvks and Zvkg.", [FeatureStdExtZvks, FeatureStdExtZvkg]>; @@ -959,6 +959,10 @@ def TuneNoOptimizedZeroStrideLoad "false", "Hasn't optimized (perform fewer memory operations)" "zero-stride vector load">; +def Experimental + : SubtargetFeature<"experimental", "HasExperimental", + "true", "Experimental intrinsics">; + // Some vector hardware implementations do not process all VLEN bits in parallel // and instead split over multiple cycles. DLEN refers to the datapath width // that can be done in parallel. @@ -973,9 +977,19 @@ def TuneLUIADDIFusion def TuneAUIPCADDIFusion : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion", "true", "Enable AUIPC+ADDI macrofusion">; -def TuneShiftedZExtFusion - : SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion", - "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">; + +def TuneZExtHFusion + : SubtargetFeature<"zexth-fusion", "HasZExtHFusion", + "true", "Enable SLLI+SRLI to be fused to zero extension of halfword">; + +def TuneZExtWFusion + : SubtargetFeature<"zextw-fusion", "HasZExtWFusion", + "true", "Enable SLLI+SRLI to be fused to zero extension of word">; + +def TuneShiftedZExtWFusion + : SubtargetFeature<"shifted-zextw-fusion", "HasShiftedZExtWFusion", + "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension of word">; + def TuneLDADDFusion : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion", "true", "Enable LD+ADD macrofusion.">; @@ -997,12 +1011,8 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", [TuneNoDefaultUnroll, TuneShortForwardBranchOpt]>; -def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron", - "Ventana Veyron-Series processors", - [TuneLUIADDIFusion, - TuneAUIPCADDIFusion, - TuneShiftedZExtFusion, - TuneLDADDFusion]>; +def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron", + "Ventana Veyron-Series processors">; // Assume that lock-free native-width atomics are available, even if the target // and operating system combination would not usually provide them. The user diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 09b3ab96974c..098a320c9153 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -915,8 +915,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { Opc = RISCV::FMV_H_X; break; case MVT::f16: - Opc = - Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; + Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; break; case MVT::f32: Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 03e994586d0c..c2508a158837 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -122,7 +122,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit() && RV64LegalI32) addRegisterClass(MVT::i32, &RISCV::GPRRegClass); - if (Subtarget.hasStdExtZfhOrZfhmin()) + if (Subtarget.hasStdExtZfhmin()) addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); if (Subtarget.hasStdExtZfbfmin()) addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass); @@ -130,7 +130,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); if (Subtarget.hasStdExtD()) addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); - if (Subtarget.hasStdExtZhinxOrZhinxmin()) + if (Subtarget.hasStdExtZhinxmin()) addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass); if (Subtarget.hasStdExtZfinx()) addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass); @@ -439,7 +439,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN}; - if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) + if (Subtarget.hasStdExtZfhminOrZhinxmin()) setOperationAction(ISD::BITCAST, MVT::i16, Custom); static const unsigned ZfhminZfbfminPromoteOps[] = { @@ -469,7 +469,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); } - if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) { + if (Subtarget.hasStdExtZfhminOrZhinxmin()) { if (Subtarget.hasStdExtZfhOrZhinx()) { setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); setOperationAction(FPRndMode, MVT::f16, @@ -675,7 +675,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, - ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE}; + ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, @@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, - ISD::EXPERIMENTAL_VP_REVERSE}; + ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; static const unsigned IntegerVecReduceOps[] = { ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, @@ -773,6 +773,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); setOperationPromotedToType( @@ -1147,6 +1148,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SETCC, ISD::VP_TRUNCATE}, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); continue; } @@ -1322,7 +1324,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Custom-legalize bitcasts from fixed-length vectors to scalar types. setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, Custom); - if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) + if (Subtarget.hasStdExtZfhminOrZhinxmin()) setOperationAction(ISD::BITCAST, MVT::f16, Custom); if (Subtarget.hasStdExtFOrZfinx()) setOperationAction(ISD::BITCAST, MVT::f32, Custom); @@ -1388,7 +1390,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasStdExtZbkb()) setTargetDAGCombine(ISD::BITREVERSE); - if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) + if (Subtarget.hasStdExtZfhminOrZhinxmin()) setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); if (Subtarget.hasStdExtFOrZfinx()) setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, @@ -2099,7 +2101,7 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { bool IsLegalVT = false; if (VT == MVT::f16) - IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin(); + IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin(); else if (VT == MVT::f32) IsLegalVT = Subtarget.hasStdExtFOrZfinx(); else if (VT == MVT::f64) @@ -2171,7 +2173,7 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. // We might still end up using a GPR but that will be decided based on ABI. if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && - !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) + !Subtarget.hasStdExtZfhminOrZhinxmin()) return MVT::f32; MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); @@ -2188,7 +2190,7 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. // We might still end up using a GPR but that will be decided based on ABI. if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && - !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) + !Subtarget.hasStdExtZfhminOrZhinxmin()) return 1; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); @@ -5528,7 +5530,7 @@ static unsigned getRISCVVLOp(SDValue Op) { case ISD::VP_SELECT: return RISCVISD::VSELECT_VL; case ISD::VP_MERGE: - return RISCVISD::VP_MERGE_VL; + return RISCVISD::VMERGE_VL; case ISD::VP_ASHR: return RISCVISD::SRA_VL; case ISD::VP_LSHR: @@ -5576,6 +5578,8 @@ static bool hasMergeOp(unsigned Opcode) { return true; if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) return true; + if (Opcode == RISCVISD::VMERGE_VL) + return true; return false; } @@ -5761,7 +5765,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, EVT Op0VT = Op0.getValueType(); MVT XLenVT = Subtarget.getXLenVT(); if (VT == MVT::f16 && Op0VT == MVT::i16 && - Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) { + Subtarget.hasStdExtZfhminOrZhinxmin()) { SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); return FPConv; @@ -6637,6 +6641,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, !Subtarget.hasVInstructionsF16())) return SplitVPOp(Op, DAG); return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + case ISD::EXPERIMENTAL_VP_SPLICE: + return lowerVPSpliceExperimental(Op, DAG); case ISD::EXPERIMENTAL_VP_REVERSE: return lowerVPReverseExperimental(Op, DAG); } @@ -8238,8 +8244,8 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, AVL); // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. // It's fine because vmerge does not care mask policy. - return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, - AVL); + return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, + MaskedOff, AVL); } } @@ -10312,9 +10318,20 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { for (const auto &OpIdx : enumerate(Op->ops())) { SDValue V = OpIdx.value(); assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); - // Add dummy merge value before the mask. - if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index()) - Ops.push_back(DAG.getUNDEF(ContainerVT)); + // Add dummy merge value before the mask. Or if there isn't a mask, before + // EVL. + if (HasMergeOp) { + auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode()); + if (MaskIdx) { + if (*MaskIdx == OpIdx.index()) + Ops.push_back(DAG.getUNDEF(ContainerVT)); + } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == + OpIdx.index()) { + // For VP_MERGE, copy the false operand instead of an undef value. + assert(Op.getOpcode() == ISD::VP_MERGE); + Ops.push_back(Ops.back()); + } + } // Pass through operands which aren't fixed-length vectors. if (!V.getValueType().isFixedLengthVector()) { Ops.push_back(V); @@ -10583,6 +10600,87 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, } SDValue +RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + + SDValue Op1 = Op.getOperand(0); + SDValue Op2 = Op.getOperand(1); + SDValue Offset = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue EVL1 = Op.getOperand(4); + SDValue EVL2 = Op.getOperand(5); + + const MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op.getSimpleValueType(); + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + + bool IsMaskVector = VT.getVectorElementType() == MVT::i1; + if (IsMaskVector) { + ContainerVT = ContainerVT.changeVectorElementType(MVT::i8); + + // Expand input operands + SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + DAG.getConstant(1, DL, XLenVT), EVL1); + SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + DAG.getConstant(0, DL, XLenVT), EVL1); + Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op1, SplatOneOp1, + SplatZeroOp1, EVL1); + + SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + DAG.getConstant(1, DL, XLenVT), EVL2); + SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + DAG.getConstant(0, DL, XLenVT), EVL2); + Op2 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op2, SplatOneOp2, + SplatZeroOp2, EVL2); + } + + int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue(); + SDValue DownOffset, UpOffset; + if (ImmValue >= 0) { + // The operand is a TargetConstant, we need to rebuild it as a regular + // constant. + DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); + UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset); + } else { + // The operand is a TargetConstant, we need to rebuild it as a regular + // constant rather than negating the original operand. + UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); + DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset); + } + + SDValue SlideDown = + getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), + Op1, DownOffset, Mask, UpOffset); + SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2, + UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC); + + if (IsMaskVector) { + // Truncate Result back to a mask vector (Result has same EVL as Op2) + Result = DAG.getNode( + RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1), + {Result, DAG.getConstant(0, DL, ContainerVT), + DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)), + Mask, EVL2}); + } + + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); +} + +SDValue RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -11527,11 +11625,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, EVT Op0VT = Op0.getValueType(); MVT XLenVT = Subtarget.getXLenVT(); if (VT == MVT::i16 && Op0VT == MVT::f16 && - Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) { + Subtarget.hasStdExtZfhminOrZhinxmin()) { SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); } else if (VT == MVT::i16 && Op0VT == MVT::bf16 && - Subtarget.hasStdExtZfbfmin()) { + Subtarget.hasStdExtZfbfmin()) { SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && @@ -13493,6 +13591,7 @@ static SDValue performMemPairCombine(SDNode *N, // (fp_to_int (ffloor X)) -> fcvt X, rdn // (fp_to_int (fceil X)) -> fcvt X, rup // (fp_to_int (fround X)) -> fcvt X, rmm +// (fp_to_int (frint X)) -> fcvt X static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { @@ -13516,10 +13615,7 @@ static SDValue performFP_TO_INTCombine(SDNode *N, RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); // If the result is invalid, we didn't find a foldable instruction. - // If the result is dynamic, then we found an frint which we don't yet - // support. It will cause 7 to be written to the FRM CSR for vector. - // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below. - if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN) + if (FRM == RISCVFPRndMode::Invalid) return SDValue(); SDLoc DL(N); @@ -13558,6 +13654,10 @@ static SDValue performFP_TO_INTCombine(SDNode *N, unsigned Opc = IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); + } else if (FRM == RISCVFPRndMode::DYN) { + unsigned Opc = + IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL; + FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); } else { unsigned Opc = IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; @@ -13594,6 +13694,7 @@ static SDValue performFP_TO_INTCombine(SDNode *N, // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) +// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { @@ -15998,13 +16099,26 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // We can't do anything for most intrinsics. break; case Intrinsic::riscv_vsetvli: - case Intrinsic::riscv_vsetvlimax: - // Assume that VL output is <= 65536. - // TODO: Take SEW and LMUL into account. - if (BitWidth > 17) - Known.Zero.setBitsFrom(17); + case Intrinsic::riscv_vsetvlimax: { + bool HasAVL = IntNo == Intrinsic::riscv_vsetvli; + unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1); + RISCVII::VLMUL VLMUL = + static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2)); + unsigned SEW = RISCVVType::decodeVSEW(VSEW); + auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL); + uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW; + MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul; + + // Result of vsetvli must be not larger than AVL. + if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1))) + MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1)); + + unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1; + if (BitWidth > KnownZeroFirstBit) + Known.Zero.setBitsFrom(KnownZeroFirstBit); break; } + } break; } } @@ -18570,7 +18684,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VNSRL_VL) NODE_NAME_CASE(SETCC_VL) NODE_NAME_CASE(VSELECT_VL) - NODE_NAME_CASE(VP_MERGE_VL) + NODE_NAME_CASE(VMERGE_VL) NODE_NAME_CASE(VMAND_VL) NODE_NAME_CASE(VMOR_VL) NODE_NAME_CASE(VMXOR_VL) @@ -18632,7 +18746,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // TODO: Support fixed vectors up to XLen for P extension? if (VT.isVector()) break; - if (VT == MVT::f16 && Subtarget.hasStdExtZhinxOrZhinxmin()) + if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) return std::make_pair(0U, &RISCV::GPRF16RegClass); if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) return std::make_pair(0U, &RISCV::GPRF32RegClass); @@ -18640,7 +18754,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &RISCV::GPRPF64RegClass); return std::make_pair(0U, &RISCV::GPRNoX0RegClass); case 'f': - if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) + if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) return std::make_pair(0U, &RISCV::FPR16RegClass); if (Subtarget.hasStdExtF() && VT == MVT::f32) return std::make_pair(0U, &RISCV::FPR32RegClass); @@ -18753,7 +18867,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } if (VT == MVT::f32 || VT == MVT::Other) return std::make_pair(FReg, &RISCV::FPR32RegClass); - if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) { + if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) { unsigned RegNo = FReg - RISCV::F0_F; unsigned HReg = RISCV::F0_H + RegNo; return std::make_pair(HReg, &RISCV::FPR16RegClass); @@ -19100,7 +19214,7 @@ bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, switch (FPVT.getSimpleVT().SimpleTy) { case MVT::f16: - return Subtarget.hasStdExtZfhOrZfhmin(); + return Subtarget.hasStdExtZfhmin(); case MVT::f32: return Subtarget.hasStdExtF(); case MVT::f64: diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h index 41a2dc5771c8..58ed611efc83 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -332,10 +332,8 @@ enum NodeType : unsigned { // Vector select with an additional VL operand. This operation is unmasked. VSELECT_VL, - // Vector select with operand #2 (the value when the condition is false) tied - // to the destination and an additional VL operand. This operation is - // unmasked. - VP_MERGE_VL, + // General vmerge node with mask, true, false, passthru, and vl operands. + VMERGE_VL, // Mask binary operators. VMAND_VL, @@ -910,6 +908,7 @@ private: SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td index e80ba26800a1..f56f49ae2457 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -212,6 +212,15 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr, // to the correct CSR. bit UsesVXRM = 0; let TSFlags{20} = UsesVXRM; + + // Indicates whther these instructions can partially overlap between source + // registers and destination registers according to the vector spec. + // 0 -> not a vector pseudo + // 1 -> default value for vector pseudos. not widening or narrowing. + // 2 -> narrowing case + // 3 -> widening case + bits<2> TargetOverlapConstraintType = 0; + let TSFlags{22-21} = TargetOverlapConstraintType; } class RVInst<dag outs, dag ins, string opcodestr, string argstr, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 5e06422cf9ad..488ffa73f4e4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -529,14 +529,6 @@ class RISCVVPseudo { // SEW = 0 is used to denote that the Pseudo is not SEW specific (or unknown). bits<8> SEW = 0; bit NeedBeInPseudoTable = 1; - // TargetOverlapConstraintType indicates that these instructions can - // overlap between source operands and destination operands. - // 1 -> default value, remain current constraint - // 2 -> narrow case - // 3 -> widen case - // TODO: Add TargetOverlapConstraintType into PseudosTable for further - // query. - bits<2> TargetOverlapConstraintType = 1; } // The actual table. diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index dc6b57fad321..33bdc3366aa3 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -344,7 +344,14 @@ def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [ ]>; def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>; -def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>; + +def SDT_RISCVVMERGE_VL : SDTypeProfile<1, 5, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>, + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameAs<0, 4>, + SDTCisVT<5, XLenVT> +]>; + +def riscv_vmerge_vl : SDNode<"RISCVISD::VMERGE_VL", SDT_RISCVVMERGE_VL>; def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>, SDTCisVT<1, XLenVT>]>; @@ -675,14 +682,14 @@ multiclass VPatTiedBinaryNoMaskVL_V<SDNode vop, op2_reg_class:$rs2, GPR:$vl, sew, TAIL_AGNOSTIC)>; // Tail undisturbed - def : Pat<(riscv_vp_merge_vl true_mask, + def : Pat<(riscv_vmerge_vl true_mask, (result_type (vop result_reg_class:$rs1, (op2_type op2_reg_class:$rs2), srcvalue, true_mask, VLOpFrag)), - result_reg_class:$rs1, VLOpFrag), + result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag), (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED") result_reg_class:$rs1, op2_reg_class:$rs2, @@ -712,14 +719,14 @@ multiclass VPatTiedBinaryNoMaskVL_V_RM<SDNode vop, FRM_DYN, GPR:$vl, sew, TAIL_AGNOSTIC)>; // Tail undisturbed - def : Pat<(riscv_vp_merge_vl true_mask, + def : Pat<(riscv_vmerge_vl true_mask, (result_type (vop result_reg_class:$rs1, (op2_type op2_reg_class:$rs2), srcvalue, true_mask, VLOpFrag)), - result_reg_class:$rs1, VLOpFrag), + result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag), (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED") result_reg_class:$rs1, op2_reg_class:$rs2, @@ -1697,21 +1704,21 @@ multiclass VPatMultiplyAccVL_VV_VX<PatFrag op, string instruction_name> { foreach vti = AllIntegerVectors in { defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), srcvalue, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), srcvalue, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; @@ -1840,17 +1847,17 @@ multiclass VPatFPMulAccVL_VV_VF<PatFrag vop, string instruction_name> { foreach vti = AllFloatVectors in { defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; @@ -1876,10 +1883,10 @@ multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> { foreach vti = AllFloatVectors in { defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), @@ -1887,10 +1894,10 @@ multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> { // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), @@ -2273,29 +2280,32 @@ foreach vti = AllIntegerVectors in { (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), - vti.RegClass:$rs1, - vti.RegClass:$rs2, - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + vti.RegClass:$rs1, + vti.RegClass:$rs2, + vti.RegClass:$merge, + VLOpFrag)), (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX) - vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), - (SplatPat XLenVT:$rs1), - vti.RegClass:$rs2, - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + (SplatPat XLenVT:$rs1), + vti.RegClass:$rs2, + vti.RegClass:$merge, + VLOpFrag)), (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX) - vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; - - def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), - (SplatPat_simm5 simm5:$rs1), - vti.RegClass:$rs2, - VLOpFrag)), + vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + (SplatPat_simm5 simm5:$rs1), + vti.RegClass:$rs2, + vti.RegClass:$merge, + VLOpFrag)), (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX) - vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + vti.RegClass:$merge, vti.RegClass:$rs2, simm5:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } } @@ -2493,21 +2503,23 @@ foreach fvti = AllFloatVectors in { (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), - fvti.RegClass:$rs1, - fvti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX) - fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), - GPR:$vl, fvti.Log2SEW)>; - - def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), - (SplatFPOp (fvti.Scalar fpimm0)), - fvti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) - fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0), - GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + fvti.RegClass:$rs1, + fvti.RegClass:$rs2, + fvti.RegClass:$merge, + VLOpFrag)), + (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX) + fvti.RegClass:$merge, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + (SplatFPOp (fvti.Scalar fpimm0)), + fvti.RegClass:$rs2, + fvti.RegClass:$merge, + VLOpFrag)), + (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) + fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; } let Predicates = GetVTypePredicates<fvti>.Predicates in { @@ -2521,12 +2533,13 @@ foreach fvti = AllFloatVectors in { (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), - (SplatFPOp fvti.ScalarRegClass:$rs1), - fvti.RegClass:$rs2, - VLOpFrag)), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + (SplatFPOp fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rs2, + fvti.RegClass:$merge, + VLOpFrag)), (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) - fvti.RegClass:$rs2, fvti.RegClass:$rs2, + fvti.RegClass:$merge, fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index fa618b437ce7..0b1d5b664df9 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -351,20 +351,22 @@ multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type, multiclass VPseudoSiFiveVQMACC<string Constraint = ""> { foreach m = MxListVF8 in + let VLMul = m.value in defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>; } multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> { foreach m = MxListFW in + let VLMul = m.value in defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>; } multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> { - foreach m = MxListVF4 in + foreach i = [0, 1, 2, 3, 4] in let hasSideEffects = 0 in - defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<!if(!eq(m.vrclass, VRM8), - VRM2, VR), - m.vrclass, FPR32, m, + defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<MxListW[i].vrclass, + MxListVF4[i].vrclass, + FPR32, MxListW[i], Constraint, /*sew*/0, UsesVXRM=0>; } @@ -592,7 +594,7 @@ multiclass VPatVFNRCLIP<string intrinsic, string instruction> { defvar Vti = pair.Vti; defvar Wti = pair.Wti; defm : VPatBinaryRoundingMode<"int_riscv_sf_" # intrinsic, - "Pseudo" # instruction # "_" # Wti.LMul.MX, + "Pseudo" # instruction # "_" # Vti.LMul.MX, Vti.Vector, Wti.Vector, Wti.Scalar, Vti.Mask, Vti.Log2SEW, Vti.RegClass, Wti.RegClass, Wti.ScalarRegClass>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 1ffa78a28d09..7c21fb4bcc1e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file describes the RISC-V instructions from the standard 'Zvk', -// Vector Cryptography Instructions extension, version 1.0.0-rc1. +// Vector Cryptography Instructions extension, version Release 1.0.0. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp index 02ea5270823d..f948f05b22f7 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp @@ -58,18 +58,66 @@ static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); } -// Fuse these patterns: -// -// slli rd, rs1, 32 -// srli rd, rd, x -// where 0 <= x <= 32 -// -// and -// +// Fuse zero extension of halfword: // slli rd, rs1, 48 +// srli rd, rd, 48 +static bool isZExtH(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::SRLI) + return false; + + if (!SecondMI.getOperand(2).isImm()) + return false; + + if (SecondMI.getOperand(2).getImm() != 48) + return false; + + // Given SecondMI, when FirstMI is unspecified, we must return + // if SecondMI may be part of a fused pair at all. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::SLLI) + return false; + + if (FirstMI->getOperand(2).getImm() != 48) + return false; + + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); +} + +// Fuse zero extension of word: +// slli rd, rs1, 32 +// srli rd, rd, 32 +static bool isZExtW(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::SRLI) + return false; + + if (!SecondMI.getOperand(2).isImm()) + return false; + + if (SecondMI.getOperand(2).getImm() != 32) + return false; + + // Given SecondMI, when FirstMI is unspecified, we must return + // if SecondMI may be part of a fused pair at all. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::SLLI) + return false; + + if (FirstMI->getOperand(2).getImm() != 32) + return false; + + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); +} + +// Fuse shifted zero extension of word: +// slli rd, rs1, 32 // srli rd, rd, x -static bool isShiftedZExt(const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { +// where 0 <= x < 32 +static bool isShiftedZExtW(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { if (SecondMI.getOpcode() != RISCV::SRLI) return false; @@ -77,8 +125,7 @@ static bool isShiftedZExt(const MachineInstr *FirstMI, return false; unsigned SRLIImm = SecondMI.getOperand(2).getImm(); - bool IsShiftBy48 = SRLIImm == 48; - if (SRLIImm > 32 && !IsShiftBy48) + if (SRLIImm >= 32) return false; // Given SecondMI, when FirstMI is unspecified, we must return @@ -89,8 +136,7 @@ static bool isShiftedZExt(const MachineInstr *FirstMI, if (FirstMI->getOpcode() != RISCV::SLLI) return false; - unsigned SLLIImm = FirstMI->getOperand(2).getImm(); - if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32)) + if (FirstMI->getOperand(2).getImm() != 32) return false; return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); @@ -144,7 +190,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI)) return true; - if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI)) + if (ST.hasZExtHFusion() && isZExtH(FirstMI, SecondMI)) + return true; + + if (ST.hasZExtWFusion() && isZExtW(FirstMI, SecondMI)) + return true; + + if (ST.hasShiftedZExtWFusion() && isShiftedZExtW(FirstMI, SecondMI)) return true; if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI)) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td index 58989fd716fa..6362a3bef6f2 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -216,6 +216,25 @@ def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model, [TuneSiFive7, TuneDLenFactor2]>; +def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", NoSchedModel, + [Feature64Bit, + FeatureStdExtZifencei, + FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtD, + FeatureStdExtC, + FeatureStdExtZicbop, + FeatureStdExtZicbom, + FeatureStdExtZicboz, + FeatureStdExtZihintntl, + FeatureStdExtZihintpause, + FeatureStdExtZihpm, + FeatureStdExtZba, + FeatureStdExtZbb, + FeatureStdExtZbs, + FeatureStdExtZfhmin]>; + def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base", SyntacoreSCR1Model, [Feature32Bit, @@ -254,7 +273,13 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", FeatureStdExtZicbop, FeatureStdExtZicboz, FeatureVendorXVentanaCondOps], - [TuneVeyronFusions]>; + [TuneVentanaVeyron, + TuneLUIADDIFusion, + TuneAUIPCADDIFusion, + TuneZExtHFusion, + TuneZExtWFusion, + TuneShiftedZExtWFusion, + TuneLDADDFusion]>; def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu", NoSchedModel, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 45783d482f3b..f531ab2fac8f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -198,6 +198,7 @@ def SiFive7Model : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = 0; + let EnableIntervals = true; let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h index 23d56cfa6e4e..26320b05d9be 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -143,16 +143,12 @@ public: bool hasStdExtZvl() const { return ZvlLen != 0; } bool hasStdExtFOrZfinx() const { return HasStdExtF || HasStdExtZfinx; } bool hasStdExtDOrZdinx() const { return HasStdExtD || HasStdExtZdinx; } - bool hasStdExtZfhOrZfhmin() const { return HasStdExtZfh || HasStdExtZfhmin; } bool hasStdExtZfhOrZhinx() const { return HasStdExtZfh || HasStdExtZhinx; } - bool hasStdExtZhinxOrZhinxmin() const { - return HasStdExtZhinx || HasStdExtZhinxmin; - } - bool hasStdExtZfhOrZfhminOrZhinxOrZhinxmin() const { - return hasStdExtZfhOrZfhmin() || hasStdExtZhinxOrZhinxmin(); + bool hasStdExtZfhminOrZhinxmin() const { + return HasStdExtZfhmin || HasStdExtZhinxmin; } bool hasHalfFPLoadStoreMove() const { - return hasStdExtZfhOrZfhmin() || HasStdExtZfbfmin; + return HasStdExtZfhmin || HasStdExtZfbfmin; } bool is64Bit() const { return IsRV64; } MVT getXLenVT() const { @@ -194,16 +190,14 @@ public: } bool hasMacroFusion() const { - return hasLUIADDIFusion() || hasAUIPCADDIFusion() || - hasShiftedZExtFusion() || hasLDADDFusion(); + return hasLUIADDIFusion() || hasAUIPCADDIFusion() || hasZExtHFusion() || + hasZExtWFusion() || hasShiftedZExtWFusion() || hasLDADDFusion(); } // Vector codegen related methods. bool hasVInstructions() const { return HasStdExtZve32x; } bool hasVInstructionsI64() const { return HasStdExtZve64x; } - bool hasVInstructionsF16Minimal() const { - return HasStdExtZvfhmin || HasStdExtZvfh; - } + bool hasVInstructionsF16Minimal() const { return HasStdExtZvfhmin; } bool hasVInstructionsF16() const { return HasStdExtZvfh; } bool hasVInstructionsBF16() const { return HasStdExtZvfbfmin; } bool hasVInstructionsF32() const { return HasStdExtZve32f; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index efc8350064a6..96ecc771863e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -334,7 +334,7 @@ public: return RISCVRegisterClass::GPRRC; Type *ScalarTy = Ty->getScalarType(); - if ((ScalarTy->isHalfTy() && ST->hasStdExtZfhOrZfhmin()) || + if ((ScalarTy->isHalfTy() && ST->hasStdExtZfhmin()) || (ScalarTy->isFloatTy() && ST->hasStdExtF()) || (ScalarTy->isDoubleTy() && ST->hasStdExtD())) { return RISCVRegisterClass::FPRRC; diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 5ac45079bd00..c85bd27d256b 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -1617,7 +1617,7 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call, SPIRVGlobalRegistry *GR) { MachineRegisterInfo *MRI = MIRBuilder.getMRI(); const DataLayout &DL = MIRBuilder.getDataLayout(); - bool HasEvents = Call->Builtin->Name.find("events") != StringRef::npos; + bool HasEvents = Call->Builtin->Name.contains("events"); const SPIRVType *Int32Ty = GR->getOrCreateSPIRVIntegerType(32, MIRBuilder); // Make vararg instructions before OpEnqueueKernel. @@ -2098,7 +2098,7 @@ parseBuiltinTypeNameToTargetExtType(std::string TypeName, // Parameterized SPIR-V builtins names follow this format: // e.g. %spirv.Image._void_1_0_0_0_0_0_0, %spirv.Pipe._0 - if (NameWithParameters.find('_') == std::string::npos) + if (!NameWithParameters.contains('_')) return TargetExtType::get(MIRBuilder.getContext(), NameWithParameters); SmallVector<StringRef> Parameters; diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index ec62a819b00e..660c574daf38 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -410,10 +410,10 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I) { Constant *EltTyConst; unsigned AddressSpace = 0; if (auto *AI = dyn_cast<AllocaInst>(I)) { - EltTyConst = Constant::getNullValue(AI->getAllocatedType()); + EltTyConst = UndefValue::get(AI->getAllocatedType()); AddressSpace = AI->getAddressSpace(); } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { - EltTyConst = Constant::getNullValue(GEP->getResultElementType()); + EltTyConst = UndefValue::get(GEP->getResultElementType()); AddressSpace = GEP->getPointerAddressSpace(); } else { llvm_unreachable("Unexpected instruction!"); @@ -436,7 +436,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) { TypeToAssign = t->second->getType(); } } - Constant *Const = Constant::getNullValue(TypeToAssign); + Constant *Const = UndefValue::get(TypeToAssign); buildIntrWithMD(Intrinsic::spv_assign_type, {Ty}, Const, I, {}); } for (const auto &Op : I->operands()) { diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index b8a6784ff3c6..3a34a0bfae46 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -645,7 +645,7 @@ SPIRVType *SPIRVGlobalRegistry::findSPIRVType( Register Reg = DT.find(Ty, &MIRBuilder.getMF()); if (Reg.isValid()) return getSPIRVTypeForVReg(Reg); - if (ForwardPointerTypes.find(Ty) != ForwardPointerTypes.end()) + if (ForwardPointerTypes.contains(Ty)) return ForwardPointerTypes[Ty]; return restOfCreateSPIRVType(Ty, MIRBuilder, AccQual, EmitIR); } @@ -712,14 +712,14 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType( // Null pointer means we have a loop in type definitions, make and // return corresponding OpTypeForwardPointer. if (SpvElementType == nullptr) { - if (ForwardPointerTypes.find(Ty) == ForwardPointerTypes.end()) + if (!ForwardPointerTypes.contains(Ty)) ForwardPointerTypes[PType] = getOpTypeForwardPointer(SC, MIRBuilder); return ForwardPointerTypes[PType]; } Register Reg(0); // If we have forward pointer associated with this type, use its register // operand to create OpTypePointer. - if (ForwardPointerTypes.find(PType) != ForwardPointerTypes.end()) + if (ForwardPointerTypes.contains(PType)) Reg = getSPIRVTypeID(ForwardPointerTypes[PType]); return getOpTypePointer(SC, SpvElementType, MIRBuilder, Reg); diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 779036016560..2a830535a2aa 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -391,7 +391,7 @@ void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) { if (MI.getOpcode() != SPIRV::OpExtInst) continue; auto Set = MI.getOperand(2).getImm(); - if (MAI.ExtInstSetMap.find(Set) == MAI.ExtInstSetMap.end()) + if (!MAI.ExtInstSetMap.contains(Set)) MAI.ExtInstSetMap[Set] = Register::index2VirtReg(MAI.getNextID()); } } diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 5124181b49e2..d0b8027edd42 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -189,7 +189,7 @@ struct ModuleAnalysisInfo { } unsigned getNextID() { return MaxID++; } bool hasMBBRegister(const MachineBasicBlock &MBB) { - return BBNumToRegMap.find(MBB.getNumber()) != BBNumToRegMap.end(); + return BBNumToRegMap.contains(MBB.getNumber()); } // Convert MBB's number to corresponding ID register. Register getOrCreateMBBRegister(const MachineBasicBlock &MBB) { diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index f4076be2a7b7..1bfce70fedc0 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -83,7 +83,7 @@ static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR) { } for (MachineInstr *MI : ToErase) { Register Reg = MI->getOperand(2).getReg(); - if (RegsAlreadyAddedToDT.find(MI) != RegsAlreadyAddedToDT.end()) + if (RegsAlreadyAddedToDT.contains(MI)) Reg = RegsAlreadyAddedToDT[MI]; auto *RC = MRI.getRegClassOrNull(MI->getOperand(0).getReg()); if (!MRI.getRegClassOrNull(Reg) && RC) diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index e61b07e973e9..66555fa06b06 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -38,6 +38,7 @@ SystemZMCAsmInfoGOFF::SystemZMCAsmInfoGOFF(const Triple &TT) { DotIsPC = false; EmitGNUAsmStartIndentationMarker = false; EmitLabelsInUpperCase = true; + ExceptionsType = ExceptionHandling::ZOS; IsLittleEndian = false; MaxInstLength = 6; RestrictCommentStringToStartOfStatement = true; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 3186002c57d9..243461c0316e 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -1115,7 +1115,7 @@ void SystemZAsmPrinter::emitFunctionBodyEnd() { static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, bool StackProtector, bool FPRMask, bool VRMask, - bool HasName) { + bool EHBlock, bool HasName) { enum class PPA1Flag1 : uint8_t { DSA64Bit = (0x80 >> 0), VarArg = (0x80 >> 7), @@ -1133,6 +1133,7 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, enum class PPA1Flag4 : uint8_t { EPMOffsetPresent = (0x80 >> 0), VRMask = (0x80 >> 2), + EHBlock = (0x80 >> 3), ProcedureNamePresent = (0x80 >> 7), LLVM_MARK_AS_BITMASK_ENUM(EPMOffsetPresent) }; @@ -1158,6 +1159,9 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, if (VRMask) Flags4 |= PPA1Flag4::VRMask; // Add emit VR mask flag. + if (EHBlock) + Flags4 |= PPA1Flag4::EHBlock; // Add optional EH block. + if (HasName) Flags4 |= PPA1Flag4::ProcedureNamePresent; // Add optional name block. @@ -1188,6 +1192,8 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, OutStreamer->AddComment("PPA1 Flags 4"); if ((Flags4 & PPA1Flag4::VRMask) == PPA1Flag4::VRMask) OutStreamer->AddComment(" Bit 2: 1 = Vector Reg Mask is in optional area"); + if ((Flags4 & PPA1Flag4::EHBlock) == PPA1Flag4::EHBlock) + OutStreamer->AddComment(" Bit 3: 1 = C++ EH block"); if ((Flags4 & PPA1Flag4::ProcedureNamePresent) == PPA1Flag4::ProcedureNamePresent) OutStreamer->AddComment(" Bit 7: 1 = Name Length and Name"); @@ -1314,12 +1320,14 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) { OutStreamer->AddComment("Offset to PPA2"); OutStreamer->emitAbsoluteSymbolDiff(PPA2Sym, CurrentFnPPA1Sym, 4); + bool NeedEmitEHBlock = !MF->getLandingPads().empty(); + bool HasName = MF->getFunction().hasName() && MF->getFunction().getName().size() > 0; emitPPA1Flags(OutStreamer, MF->getFunction().isVarArg(), MFFrame.hasStackProtectorIndex(), SavedFPRMask != 0, - TargetHasVector && SavedVRMask != 0, HasName); + TargetHasVector && SavedVRMask != 0, NeedEmitEHBlock, HasName); OutStreamer->AddComment("Length/4 of Parms"); OutStreamer->emitInt16( @@ -1361,6 +1369,29 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) { OutStreamer->emitInt32(FrameAndVROffset); } + // Emit C++ EH information block + const Function *Per = nullptr; + if (NeedEmitEHBlock) { + Per = dyn_cast<Function>( + MF->getFunction().getPersonalityFn()->stripPointerCasts()); + MCSymbol *PersonalityRoutine = + Per ? MF->getTarget().getSymbol(Per) : nullptr; + assert(PersonalityRoutine && "Missing personality routine"); + + OutStreamer->AddComment("Version"); + OutStreamer->emitInt32(1); + OutStreamer->AddComment("Flags"); + OutStreamer->emitInt32(0); // LSDA field is a WAS offset + OutStreamer->AddComment("Personality routine"); + OutStreamer->emitInt64(ADATable.insert( + PersonalityRoutine, SystemZII::MO_ADA_INDIRECT_FUNC_DESC)); + OutStreamer->AddComment("LSDA location"); + MCSymbol *GCCEH = MF->getContext().getOrCreateSymbol( + Twine("GCC_except_table") + Twine(MF->getFunctionNumber())); + OutStreamer->emitInt64( + ADATable.insert(GCCEH, SystemZII::MO_ADA_DATA_SYMBOL_ADDR)); + } + // Emit name length and name optional section (0x01 of flags 4) if (HasName) emitPPA1Name(OutStreamer, MF->getFunction().getName()); diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 7522998fd06d..db19c8881c68 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetMachine.h" @@ -994,6 +995,11 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( if (hasFP(MF) || Subtarget.hasBackChain()) CSI.push_back(CalleeSavedInfo(Regs.getStackPointerRegister())); + // If this function has an associated personality function then the + // environment register R5 must be saved in the DSA. + if (!MF.getLandingPads().empty()) + CSI.push_back(CalleeSavedInfo(Regs.getADARegister())); + // Scan the call-saved GPRs and find the bounds of the register spill area. Register LowRestoreGPR = 0; int LowRestoreOffset = INT32_MAX; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index e5e1e91916f3..c7d8591c5bdf 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -465,7 +465,8 @@ bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, bool IsBase) const { SDValue N = IsBase ? AM.Base : AM.Index; unsigned Opcode = N.getOpcode(); - if (Opcode == ISD::TRUNCATE) { + // Look through no-op truncations. + if (Opcode == ISD::TRUNCATE && N.getOperand(0).getValueSizeInBits() <= 64) { N = N.getOperand(0); Opcode = N.getOpcode(); } diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index a1803cf9a042..559f2ca476d7 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1362,6 +1362,16 @@ SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, report_fatal_error("Invalid register name global variable"); } +Register SystemZTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; +} + +Register SystemZTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; +} + void SystemZTargetLowering::LowerAsmOperandForConstraint( SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 6b3ce3f8c1d2..baf4ba416548 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -555,16 +555,12 @@ public: /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. Register - getExceptionPointerRegister(const Constant *PersonalityFn) const override { - return SystemZ::R6D; - } + getExceptionPointerRegister(const Constant *PersonalityFn) const override; /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. Register - getExceptionSelectorRegister(const Constant *PersonalityFn) const override { - return SystemZ::R7D; - } + getExceptionSelectorRegister(const Constant *PersonalityFn) const override; /// Override to support customized stack guard loading. bool useLoadStackGuardNode() const override { diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp index 3f96bd37755e..2a4383314e46 100644 --- a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp @@ -61,8 +61,6 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const { // We should properly mark well-known section name prefixes as small/large, // because otherwise the output section may have the wrong section flags and // the linker will lay it out in an unexpected way. - // TODO: bring back lbss/ldata/lrodata checks after fixing accesses to large - // globals in the small code model. StringRef Name = GV->getSection(); if (!Name.empty()) { auto IsPrefix = [&](StringRef Prefix) { @@ -71,6 +69,8 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const { }; if (IsPrefix(".bss") || IsPrefix(".data") || IsPrefix(".rodata")) return false; + if (IsPrefix(".lbss") || IsPrefix(".ldata") || IsPrefix(".lrodata")) + return true; } // For x86-64, we treat an explicit GlobalVariable small code model to mean diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index e78d16056460..bc5f562d9589 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -263,8 +263,7 @@ private: return 0; SmallVector<ICToken, 16> OperandStack; - for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { - ICToken Op = PostfixStack[i]; + for (const ICToken &Op : PostfixStack) { if (Op.first == IC_IMM || Op.first == IC_REGISTER) { OperandStack.push_back(Op); } else if (isUnaryOperator(Op.first)) { @@ -1731,8 +1730,8 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, OrigOperands.pop_back(); } // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); - for (unsigned int i = 0; i < FinalOperands.size(); ++i) - OrigOperands.push_back(std::move(FinalOperands[i])); + for (auto &Op : FinalOperands) + OrigOperands.push_back(std::move(Op)); return false; } @@ -3062,6 +3061,35 @@ bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp, ErrMsg)) return Error(BaseLoc, ErrMsg); + // If the displacement is a constant, check overflows. For 64-bit addressing, + // gas requires isInt<32> and otherwise reports an error. For others, gas + // reports a warning and allows a wider range. E.g. gas allows + // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses + // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000). + if (BaseReg || IndexReg) { + if (auto CE = dyn_cast<MCConstantExpr>(Disp)) { + auto Imm = CE->getValue(); + bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg); + bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg); + if (Is64) { + if (!isInt<32>(Imm)) + return Error(BaseLoc, "displacement " + Twine(Imm) + + " is not within [-2147483648, 2147483647]"); + } else if (!Is16) { + if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) { + Warning(BaseLoc, "displacement " + Twine(Imm) + + " shortened to 32-bit signed " + + Twine(static_cast<int32_t>(Imm))); + } + } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) { + Warning(BaseLoc, "displacement " + Twine(Imm) + + " shortened to 16-bit signed " + + Twine(static_cast<int16_t>(Imm))); + } + } + } + if (SegReg || BaseReg || IndexReg) Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, StartLoc, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index b6ebbcf56aef..9e1f1eb97e70 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1060,7 +1060,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg); Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg); CurOp += X86::AddrNumOperands; - Prefix.set4V(MI, CurOp++); + Prefix.set4VV2(MI, CurOp++); break; } case X86II::MRM_C0: diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp index 7f134fe1c72b..0ba31e173a1a 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp @@ -1306,8 +1306,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32)); } - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); + for (unsigned Reg : RetRegs) + MIB.addReg(Reg, RegState::Implicit); return true; } @@ -3346,8 +3346,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Walk the register/memloc assignments, inserting copies/loads. const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign const &VA = ArgLocs[i]; + for (const CCValAssign &VA : ArgLocs) { const Value *ArgVal = OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp index aab2535aa86d..ca4d03913d09 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -462,8 +462,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { // Check to see if any of the values defined by this instruction are dead // after definition. If so, pop them. - for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { - unsigned Reg = DeadRegs[i]; + for (unsigned Reg : DeadRegs) { // Check if Reg is live on the stack. An inline-asm register operand that // is in the clobber list and marked dead might not be live on the stack. static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers"); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 7ec59c74f5f5..77a997588c4f 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1828,9 +1828,7 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { // That signifies access to globals that are known to be "near", // such as the GOT itself. CodeModel::Model M = TM.getCodeModel(); - if (Subtarget->is64Bit() && - ((M == CodeModel::Large && !IsRIPRelTLS) || - (M == CodeModel::Medium && !IsRIPRel))) + if (Subtarget->is64Bit() && M == CodeModel::Large && !IsRIPRelTLS) return true; // Base and index reg must be 0 in order to use %rip as base. @@ -1866,6 +1864,13 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { } else llvm_unreachable("Unhandled symbol reference node."); + // Can't use an addressing mode with large globals. + if (Subtarget->is64Bit() && !IsRIPRel && AM.GV && + TM.isLargeGlobalValue(AM.GV)) { + AM = Backup; + return true; + } + if (foldOffsetIntoAddress(Offset, AM)) { AM = Backup; return true; @@ -1910,20 +1915,12 @@ bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, // because it has a smaller encoding. - // TODO: Which other code models can use this? - switch (TM.getCodeModel()) { - default: break; - case CodeModel::Small: - case CodeModel::Kernel: - if (Subtarget->is64Bit() && - AM.Scale == 1 && - AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == nullptr && - AM.IndexReg.getNode() == nullptr && - AM.SymbolFlags == X86II::MO_NO_FLAG && - AM.hasSymbolicDisplacement()) - AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); - break; + if (TM.getCodeModel() != CodeModel::Large && + (!AM.GV || !TM.isLargeGlobalValue(AM.GV)) && Subtarget->is64Bit() && + AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base_Reg.getNode() == nullptr && AM.IndexReg.getNode() == nullptr && + AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) { + AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); } return false; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index b80c766c7ffa..63bdf24d6b4f 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2674,34 +2674,33 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); } -bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, - bool hasSymbolicDisplacement) { +bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model CM, + bool HasSymbolicDisplacement) { // Offset should fit into 32 bit immediate field. if (!isInt<32>(Offset)) return false; // If we don't have a symbolic displacement - we don't have any extra // restrictions. - if (!hasSymbolicDisplacement) + if (!HasSymbolicDisplacement) return true; - // FIXME: Some tweaks might be needed for medium code model. - if (M != CodeModel::Small && M != CodeModel::Kernel) - return false; - - // For small code model we assume that latest object is 16MB before end of 31 - // bits boundary. We may also accept pretty large negative constants knowing - // that all objects are in the positive half of address space. - if (M == CodeModel::Small && Offset < 16*1024*1024) + // We can fold large offsets in the large code model because we always use + // 64-bit offsets. + if (CM == CodeModel::Large) return true; // For kernel code model we know that all object resist in the negative half // of 32bits address space. We may not accept negative offsets, since they may // be just off and we may accept pretty large positive ones. - if (M == CodeModel::Kernel && Offset >= 0) - return true; + if (CM == CodeModel::Kernel) + return Offset >= 0; - return false; + // For other non-large code models we assume that latest small object is 16MB + // before end of 31 bits boundary. We may also accept pretty large negative + // constants knowing that all objects are in the positive half of address + // space. + return Offset < 16 * 1024 * 1024; } /// Return true if the condition is an signed comparison operation. @@ -4554,7 +4553,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec); } -static const ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) { +static ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) { if (Ptr.getOpcode() == X86ISD::Wrapper || Ptr.getOpcode() == X86ISD::WrapperRIP) Ptr = Ptr.getOperand(0); @@ -4562,7 +4561,7 @@ static const ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) { } static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) { - const ConstantPoolSDNode *CNode = getTargetConstantPoolFromBasePtr(Ptr); + ConstantPoolSDNode *CNode = getTargetConstantPoolFromBasePtr(Ptr); if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0) return nullptr; return CNode->getConstVal(); @@ -7563,8 +7562,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, } else DstVec = DAG.getUNDEF(VT); - for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) { - unsigned InsertIdx = NonConstIdx[i]; + for (unsigned InsertIdx : NonConstIdx) { DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, Op.getOperand(InsertIdx), DAG.getIntPtrConstant(InsertIdx, dl)); @@ -40857,7 +40855,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle( SDValue BC = peekThroughOneUseBitcasts(Mask); EVT BCVT = BC.getValueType(); auto *Load = dyn_cast<LoadSDNode>(BC); - if (!Load) + if (!Load || !Load->getBasePtr().hasOneUse()) return false; const Constant *C = getTargetConstantFromNode(Load); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp index 3e11ab2d98a4..6c23928228d2 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -69,8 +69,8 @@ using PrefetchHints = SampleRecord::CallTargetMap; // Return any prefetching hints for the specified MachineInstruction. The hints // are returned as pairs (name, delta). -ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples, - const MachineInstr &MI) { +ErrorOr<const PrefetchHints &> +getPrefetchHints(const FunctionSamples *TopSamples, const MachineInstr &MI) { if (const auto &Loc = MI.getDebugLoc()) if (const auto *Samples = TopSamples->findFunctionSamples(Loc)) return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc), @@ -123,7 +123,7 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, }; static const char *SerializedPrefetchPrefix = "__prefetch"; - const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI); + auto T = getPrefetchHints(TopSamples, MI); if (!T) return false; int16_t max_index = -1; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td index 2dbb3e5ee316..7f3e193d9a1b 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td @@ -20,32 +20,32 @@ let Predicates = [HasAMXTILE, In64BitMode] in { Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), "ldtilecfg\t$src", - [(int_x86_ldtilecfg addr:$src)]>, VEX, T8PS; + [(int_x86_ldtilecfg addr:$src)]>, VEX, T8; let hasSideEffects = 1 in def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), "sttilecfg\t$src", - [(int_x86_sttilecfg addr:$src)]>, VEX, T8PD; + [(int_x86_sttilecfg addr:$src)]>, VEX, T8, PD; let mayLoad = 1 in def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src), "tileloadd\t{$src, $dst|$dst, $src}", []>, - VEX, T8XD; + VEX, T8, XD; let mayLoad = 1 in def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src), "tileloaddt1\t{$src, $dst|$dst, $src}", []>, - VEX, T8PD; + VEX, T8, PD; let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in def TILERELEASE : I<0x49, MRM_C0, (outs), (ins), - "tilerelease", [(int_x86_tilerelease)]>, VEX, T8PS; + "tilerelease", [(int_x86_tilerelease)]>, VEX, T8; let mayStore = 1 in def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs), (ins sibmem:$dst, TILE:$src), "tilestored\t{$src, $dst|$dst, $src}", []>, - VEX, T8XS; + VEX, T8, XS; def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), "tilezero\t$dst", []>, - VEX, T8XD; + VEX, T8, XD; // Pseduo instruction for RA. let isPseudo = true, mayLoad = 1, hasSideEffects = 1, @@ -91,19 +91,19 @@ let Predicates = [HasAMXINT8, In64BitMode] in { def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, - VEX_4V, T8XD; + VEX, VVVV, T8, XD; def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, - VEX_4V, T8XS; + VEX, VVVV, T8, XS; def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, - VEX_4V, T8PD; + VEX, VVVV, T8, PD; def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, - VEX_4V, T8PS; + VEX, VVVV, T8; } // Pseduo instruction for RA. @@ -163,7 +163,7 @@ let Predicates = [HasAMXBF16, In64BitMode] in { def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, VEX_4V, T8XS; + []>, VEX, VVVV, T8, XS; // Pseduo instruction for RA. let isPseudo = true, Constraints = "$src4 = $dst" in @@ -193,7 +193,7 @@ let Predicates = [HasAMXFP16, In64BitMode] in { def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", - []>, VEX_4V, T8XD; + []>, VEX, VVVV, T8, XD; } // Pseduo instruction for RA. @@ -222,11 +222,11 @@ let Predicates = [HasAMXCOMPLEX, In64BitMode] in { def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", - []>, T8PD, VEX_4V; + []>, T8, PD, VEX, VVVV; def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", - []>, VEX_4V, WIG, T8PS; + []>, VEX, VVVV, WIG, T8; } // Constraints = "$src1 = $dst" diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td index e1fe2b680b96..7c3c1d5fe42b 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td @@ -378,7 +378,7 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, (vinsert_for_mask:$src3 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm))>, - AVX512AIi8Base, EVEX_4V, Sched<[sched]>; + AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; let mayLoad = 1 in defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), @@ -389,7 +389,7 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, (iPTR imm)), (vinsert_for_mask:$src3 (To.VT To.RC:$src1), (From.VT (From.LdFrag addr:$src2)), - (iPTR imm))>, AVX512AIi8Base, EVEX_4V, + (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<From.EltSize, From.CD8TupleForm>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -647,14 +647,14 @@ def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, - EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; + EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), timm:$src3))]>, - EVEX_4V, EVEX_CD8<32, CD8VT1>, + EVEX, VVVV, EVEX_CD8<32, CD8VT1>, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -1039,7 +1039,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, (bitconvert (DestInfo.VT (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], - DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; + DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>; def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", @@ -1051,7 +1051,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), MaskInfo.ImmAllZerosV))], - DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; + DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; let Constraints = "$src0 = $dst" in def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, @@ -1065,7 +1065,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), MaskInfo.RC:$src0))], - DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; + DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>; let hasSideEffects = 0, mayLoad = 1 in def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), @@ -1076,7 +1076,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, (bitconvert (DestInfo.VT (UnmaskedBcastOp addr:$src)))))], - DestInfo.ExeDomain>, T8PD, EVEX, + DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), @@ -1090,7 +1090,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, (DestInfo.VT (SrcInfo.BroadcastLdFrag addr:$src)))), MaskInfo.ImmAllZerosV))], - DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, + DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; let Constraints = "$src0 = $dst", @@ -1107,7 +1107,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, (DestInfo.VT (SrcInfo.BroadcastLdFrag addr:$src)))), MaskInfo.RC:$src0))], - DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, + DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; } @@ -1173,7 +1173,7 @@ multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, "vpbroadcast"#_.Suffix, "$src", "$src", (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, - T8PD, EVEX, Sched<[SchedRR]>; + T8, PD, EVEX, Sched<[SchedRR]>; } multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, @@ -1185,7 +1185,7 @@ multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite Sched !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), !con((ins _.KRCWM:$mask), (ins GR32:$src)), "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], - "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; + "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>; def : Pat <(_.VT (OpNode SrcRC:$src)), (!cast<Instruction>(Name#rr) @@ -1593,7 +1593,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, - EVEX_4V, AVX5128IBase, Sched<[sched]>; + EVEX, VVVV, AVX5128IBase, Sched<[sched]>; let mayLoad = 1 in defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), @@ -1601,7 +1601,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, (_.VT (_.LdFrag addr:$src3)))), 1>, - EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -1616,7 +1616,7 @@ multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, - AVX5128IBase, EVEX_4V, EVEX_B, + AVX5128IBase, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1715,14 +1715,14 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { (ins IdxVT.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, - EVEX_4V, AVX5128IBase, Sched<[sched]>; + EVEX, VVVV, AVX5128IBase, Sched<[sched]>; defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins IdxVT.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, (_.LdFrag addr:$src3))), 1>, - EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, @@ -1735,7 +1735,7 @@ multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, - AVX5128IBase, EVEX_4V, EVEX_B, + AVX5128IBase, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1800,35 +1800,35 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, (ins _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), - []>, EVEX_4V, EVEX_K, Sched<[sched]>; + []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), - []>, EVEX_4V, EVEX_KZ, Sched<[sched]>; + []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>; let mayLoad = 1 in { def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), - []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), - []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, + []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), - []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, + []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -1841,7 +1841,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, - EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), @@ -1849,7 +1849,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, - EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), @@ -1857,7 +1857,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, - EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -1921,7 +1921,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, "$cc, $src2, $src1", "$src1, $src2, $cc", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; + timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -1931,7 +1931,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), timm:$cc), (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), - timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, + timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let Uses = [MXCSR] in @@ -1944,7 +1944,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, timm:$cc), (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc)>, - EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; + EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; let isCodeGenOnly = 1 in { let isCommutable = 1 in @@ -1955,7 +1955,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, [(set _.KRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, timm:$cc))]>, - EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; + EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), @@ -1964,7 +1964,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, [(set _.KRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), timm:$cc))]>, - EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, + EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } @@ -1991,24 +1991,24 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, def rr : AVX512BI<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, EVEX_4V, Sched<[sched]>; + []>, EVEX, VVVV, Sched<[sched]>; let mayLoad = 1, hasSideEffects = 0 in def rm : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCommutable = IsCommutable, hasSideEffects = 0 in def rrk : AVX512BI<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2}"), - []>, EVEX_4V, EVEX_K, Sched<[sched]>; + []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; let mayLoad = 1, hasSideEffects = 0 in def rmk : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2}"), - []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, @@ -2020,14 +2020,14 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), - []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), - []>, EVEX_4V, EVEX_K, EVEX_B, + []>, EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2082,7 +2082,7 @@ defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, - T8PD, REX_W, EVEX_CD8<64, CD8VF>; + T8, REX_W, EVEX_CD8<64, CD8VF>; defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", SchedWriteVecALU, avx512vl_i8_info, HasBWI>, @@ -2098,7 +2098,7 @@ defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, - T8PD, REX_W, EVEX_CD8<64, CD8VF>; + T8, REX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, @@ -2113,7 +2113,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), (_.VT _.RC:$src2), cond)))]>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; def rmi : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), !strconcat("vpcmp", Suffix, @@ -2123,7 +2123,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), cond)))]>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCommutable = 1 in def rrik : AVX512AIi8<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, @@ -2135,7 +2135,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), (_.VT _.RC:$src2), cond))))]>, - EVEX_4V, EVEX_K, Sched<[sched]>; + EVEX, VVVV, EVEX_K, Sched<[sched]>; def rmik : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, u8imm:$cc), @@ -2148,7 +2148,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), cond))))]>, - EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), (_.VT _.RC:$src1), cond)), @@ -2177,7 +2177,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, (_.VT _.RC:$src1), (_.BroadcastLdFrag addr:$src2), cond)))]>, - EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmibk : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), @@ -2189,7 +2189,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, (_.VT _.RC:$src1), (_.BroadcastLdFrag addr:$src2), cond))))]>, - EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), cond)), @@ -2405,11 +2405,11 @@ multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, } defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, - AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; + AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, - AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, - AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA; + AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA; // Patterns to select fp compares with load as first operand. let Predicates = [HasAVX512] in { @@ -2625,40 +2625,40 @@ multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, let Predicates = [HasDQI, NoEGPR] in defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, - VEX, PD; + VEX, TB, PD; let Predicates = [HasDQI, HasEGPR, In64BitMode] in defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">, avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">, - EVEX, PD; + EVEX, TB, PD; let Predicates = [HasAVX512, NoEGPR] in defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, - VEX, PS; + VEX, TB; let Predicates = [HasAVX512, HasEGPR, In64BitMode] in defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">, avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">, - EVEX, PS; + EVEX, TB; let Predicates = [HasBWI, NoEGPR] in { defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, - VEX, PD, REX_W; + VEX, TB, PD, REX_W; defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, - VEX, XD; + VEX, TB, XD; defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, - VEX, PS, REX_W; + VEX, TB, REX_W; defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, - VEX, XD, REX_W; + VEX, TB, XD, REX_W; } let Predicates = [HasBWI, HasEGPR, In64BitMode] in { defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">, - EVEX, PD, REX_W; + EVEX, TB, PD, REX_W; defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">, - EVEX, XD; + EVEX, TB, XD; defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">, - EVEX, PS, REX_W; + EVEX, TB, REX_W; defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">, - EVEX, XD, REX_W; + EVEX, TB, XD, REX_W; } // GR from/to mask register @@ -2769,13 +2769,13 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, X86FoldableSchedWrite sched> { defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, - sched, HasDQI>, VEX, PD; + sched, HasDQI>, VEX, TB, PD; defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, - sched, HasAVX512>, VEX, PS; + sched, HasAVX512>, VEX, TB; defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, - sched, HasBWI>, VEX, PD, REX_W; + sched, HasBWI>, VEX, TB, PD, REX_W; defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, - sched, HasBWI>, VEX, PS, REX_W; + sched, HasBWI>, VEX, TB, REX_W; } // TODO - do we need a X86SchedWriteWidths::KMASK type? @@ -2812,13 +2812,13 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, bit IsCommutable, Predicate prdW = HasAVX512> { defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, - sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; + sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD; defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, - sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; + sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB; defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, - sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PD; + sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD; defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, - sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PS; + sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB; } // TODO - do we need a X86SchedWriteWidths::KMASK type? @@ -2869,16 +2869,16 @@ multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), (ins Src.KRC:$src1, Src.KRC:$src2), "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V, VEX_L, Sched<[sched]>; + VEX, VVVV, VEX_L, Sched<[sched]>; def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; } } -defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; -defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; -defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, REX_W; +defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, TB, PD; +defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB; +defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W; // Mask bit testing multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, @@ -2895,13 +2895,13 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, Predicate prdW = HasAVX512> { defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, - VEX, PD; + VEX, TB, PD; defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, - VEX, PS; + VEX, TB; defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, - VEX, PS, REX_W; + VEX, TB, REX_W; defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, - VEX, PD, REX_W; + VEX, TB, PD, REX_W; } // TODO - do we need a X86SchedWriteWidths::KMASK type? @@ -2922,15 +2922,15 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched> { defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, - sched>, VEX, TAPD, REX_W; + sched>, VEX, TA, PD, REX_W; let Predicates = [HasDQI] in defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, - sched>, VEX, TAPD; + sched>, VEX, TA, PD; let Predicates = [HasBWI] in { defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, - sched>, VEX, TAPD, REX_W; + sched>, VEX, TA, PD, REX_W; defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, - sched>, VEX, TAPD; + sched>, VEX, TA, PD; } } @@ -3371,25 +3371,25 @@ defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, - PS, EVEX_CD8<32, CD8VF>; + TB, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, - PD, REX_W, EVEX_CD8<64, CD8VF>; + TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, SchedWriteFMoveLS, "VMOVUPS">, - PS, EVEX_CD8<32, CD8VF>; + TB, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, SchedWriteFMoveLS, "VMOVUPD">, - PD, REX_W, EVEX_CD8<64, CD8VF>; + TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512, SchedWriteVecMoveLS, @@ -3397,7 +3397,7 @@ defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, HasAVX512, SchedWriteVecMoveLS, "VMOVDQA", 1>, - PD, EVEX_CD8<32, CD8VF>; + TB, PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, HasAVX512, SchedWriteVecMoveLS, @@ -3405,31 +3405,31 @@ defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, HasAVX512, SchedWriteVecMoveLS, "VMOVDQA">, - PD, REX_W, EVEX_CD8<64, CD8VF>; + TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, SchedWriteVecMoveLS, "VMOVDQU", 1>, avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, SchedWriteVecMoveLS, "VMOVDQU", 1>, - XD, EVEX_CD8<8, CD8VF>; + TB, XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, SchedWriteVecMoveLS, "VMOVDQU", 1>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, SchedWriteVecMoveLS, "VMOVDQU", 1>, - XD, REX_W, EVEX_CD8<16, CD8VF>; + TB, XD, REX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, SchedWriteVecMoveLS, "VMOVDQU", 1>, - XS, EVEX_CD8<32, CD8VF>; + TB, XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, SchedWriteVecMoveLS, "VMOVDQU">, - XS, REX_W, EVEX_CD8<64, CD8VF>; + TB, XS, REX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need // to load or store from a ZMM register instead. These are converted in @@ -3816,12 +3816,12 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))]>, - PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>, + TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>, Requires<[HasAVX512]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), - "vmovq\t{$src, $dst|$dst, $src}", []>, PD, + "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD, EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>, Requires<[HasAVX512, In64BitMode]>; @@ -3830,7 +3830,7 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), "vmovq\t{$src, $dst|$dst, $src}", [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), addr:$dst)]>, - EVEX, PD, REX_W, EVEX_CD8<64, CD8VT1>, + EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>, Requires<[HasAVX512]>; let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in @@ -3897,7 +3897,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, (ins _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], - _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; + _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; let Predicates = [prd] in { def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), @@ -3906,7 +3906,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, (_.VT (OpNode _.RC:$src1, _.RC:$src2)), _.ImmAllZerosV)))], - _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; + _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), @@ -3915,7 +3915,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, (_.VT (OpNode _.RC:$src1, _.RC:$src2)), (_.VT _.RC:$src0))))], - _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; + _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; let canFoldAsLoad = 1, isReMaterializable = 1 in { def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), @@ -3954,14 +3954,14 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, } defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, - VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; + VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>; defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, - VEX_LIG, XD, REX_W, EVEX_CD8<64, CD8VT1>; + VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, HasFP16>, - VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>; + VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, PatLeaf ZeroFP, X86VectorVTInfo _> { @@ -4286,7 +4286,7 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, T_MAP5XS, EVEX_4V, VEX_LIG, + []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG, Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in @@ -4295,20 +4295,20 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { VR128X:$src1, VR128X:$src2), "vmovsh\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", - []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG, + []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG, Sched<[SchedWriteFShuffle.XMM]>; def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", - []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG, + []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG, Sched<[SchedWriteFShuffle.XMM]>; } def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, EVEX_4V, VEX_LIG, + []>, TB, XS, EVEX, VVVV, VEX_LIG, Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in @@ -4317,20 +4317,20 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { VR128X:$src1, VR128X:$src2), "vmovss\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", - []>, EVEX_K, XS, EVEX_4V, VEX_LIG, + []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG, Sched<[SchedWriteFShuffle.XMM]>; def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", - []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, + []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG, Sched<[SchedWriteFShuffle.XMM]>; def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XD, EVEX_4V, VEX_LIG, REX_W, + []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W, Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in @@ -4339,7 +4339,7 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { VR128X:$src1, VR128X:$src2), "vmovsd\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", - []>, EVEX_K, XD, EVEX_4V, VEX_LIG, + []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG, REX_W, Sched<[SchedWriteFShuffle.XMM]>; def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), @@ -4347,7 +4347,7 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { VR128X:$src2), "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", - []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, + []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG, REX_W, Sched<[SchedWriteFShuffle.XMM]>; } @@ -4546,20 +4546,20 @@ let Predicates = [HasAVX512] in { def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, - EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; + EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>; let Predicates = [HasVLX] in { def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, - EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; + EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>; def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, - EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; + EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>; } multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, @@ -4585,11 +4585,11 @@ multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, } defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, - SchedWriteVecMoveLSNT>, PD; + SchedWriteVecMoveLSNT>, TB, PD; defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, - SchedWriteFMoveLSNT>, PD, REX_W; + SchedWriteFMoveLSNT>, TB, PD, REX_W; defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, - SchedWriteFMoveLSNT>, PS; + SchedWriteFMoveLSNT>, TB; let Predicates = [HasAVX512], AddedComplexity = 400 in { def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), @@ -4665,14 +4665,14 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), - IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, + IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, - AVX512BIBase, EVEX_4V, + AVX512BIBase, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4686,7 +4686,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"#_.BroadcastStr, (_.VT (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src2)))>, - AVX512BIBase, EVEX_4V, EVEX_B, + AVX512BIBase, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4796,13 +4796,13 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, (_Src.VT _Src.RC:$src1), (_Src.VT _Src.RC:$src2))), IsCommutable>, - AVX512BIBase, EVEX_4V, Sched<[sched]>; + AVX512BIBase, EVEX, VVVV, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (_Src.LdFrag addr:$src2)))>, - AVX512BIBase, EVEX_4V, + AVX512BIBase, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), @@ -4812,7 +4812,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, "$src1, ${src2}"#_Brdct.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, - AVX512BIBase, EVEX_4V, EVEX_B, + AVX512BIBase, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4829,22 +4829,22 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, SchedWriteVecALU, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, - SchedWritePMULLD, HasAVX512, 1>, T8PD; + SchedWritePMULLD, HasAVX512, 1>, T8; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, SchedWriteVecIMul, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, - SchedWriteVecIMul, HasDQI, 1>, T8PD, + SchedWriteVecIMul, HasDQI, 1>, T8, NotEVEX2VEXConvertible; defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, HasBWI, 1>; defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, - SchedWriteVecIMul, HasBWI, 1>, T8PD; + SchedWriteVecIMul, HasBWI, 1>, T8; defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, SchedWriteVecALU, HasBWI, 1>; defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, - SchedWriteVecIMul, HasAVX512, 1>, T8PD; + SchedWriteVecIMul, HasAVX512, 1>, T8; defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, SchedWriteVecIMul, HasAVX512, 1>; @@ -4872,7 +4872,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, avx512vl_i8_info, avx512vl_i8_info, - X86multishift, HasVBMI, 0>, T8PD; + X86multishift, HasVBMI, 0>, T8; multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _Src, X86VectorVTInfo _Dst, @@ -4884,7 +4884,7 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"#_Src.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, - EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, + EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4899,13 +4899,13 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, (_Src.VT _Src.RC:$src1), (_Src.VT _Src.RC:$src2))), IsCommutable, IsCommutable>, - EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; + EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (_Src.LdFrag addr:$src2)))>, - EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, + EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4967,48 +4967,48 @@ defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512B defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, - avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, WIG; + avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG; defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG; defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, - SchedWriteVecALU, HasBWI, 1>, T8PD; + SchedWriteVecALU, HasBWI, 1>, T8; defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, SchedWriteVecALU, HasBWI, 1>; defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, - SchedWriteVecALU, HasAVX512, 1>, T8PD; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, - SchedWriteVecALU, HasAVX512, 1>, T8PD, + SchedWriteVecALU, HasAVX512, 1>, T8, NotEVEX2VEXConvertible; defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, SchedWriteVecALU, HasBWI, 1>; defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, - SchedWriteVecALU, HasBWI, 1>, T8PD; + SchedWriteVecALU, HasBWI, 1>, T8; defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, - SchedWriteVecALU, HasAVX512, 1>, T8PD; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, - SchedWriteVecALU, HasAVX512, 1>, T8PD, + SchedWriteVecALU, HasAVX512, 1>, T8, NotEVEX2VEXConvertible; defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, - SchedWriteVecALU, HasBWI, 1>, T8PD; + SchedWriteVecALU, HasBWI, 1>, T8; defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, SchedWriteVecALU, HasBWI, 1>; defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, - SchedWriteVecALU, HasAVX512, 1>, T8PD; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, - SchedWriteVecALU, HasAVX512, 1>, T8PD, + SchedWriteVecALU, HasAVX512, 1>, T8, NotEVEX2VEXConvertible; defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, SchedWriteVecALU, HasBWI, 1>; defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, - SchedWriteVecALU, HasBWI, 1>, T8PD; + SchedWriteVecALU, HasBWI, 1>, T8; defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, - SchedWriteVecALU, HasAVX512, 1>, T8PD; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, - SchedWriteVecALU, HasAVX512, 1>, T8PD, + SchedWriteVecALU, HasAVX512, 1>, T8, NotEVEX2VEXConvertible; // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. @@ -5445,18 +5445,18 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator sched.PS.Scl, IsCommutable>, avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, sched.PS.Scl>, - XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, sched.PD.Scl, IsCommutable>, avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, sched.PD.Scl>, - XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; + TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; let Predicates = [HasFP16] in defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, VecNode, sched.PH.Scl, IsCommutable>, avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, sched.PH.Scl>, - T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>; + T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; } multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -5465,16 +5465,16 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, SaeNode, sched.PS.Scl, IsCommutable, NAME#"SS">, - XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, SaeNode, sched.PD.Scl, IsCommutable, NAME#"SD">, - XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; + TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; let Predicates = [HasFP16] in { defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, VecNode, SaeNode, sched.PH.Scl, IsCommutable, NAME#"SH">, - T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, + T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, NotEVEX2VEXConvertible; } } @@ -5515,30 +5515,30 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSS">, XS, - EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; + SchedWriteFCmp.Scl, "VMINCSS">, TB, XS, + EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSD">, XD, - REX_W, EVEX_4V, VEX_LIG, + SchedWriteFCmp.Scl, "VMINCSD">, TB, XD, + REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, SIMD_EXC; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSS">, XS, - EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; + SchedWriteFCmp.Scl, "VMAXCSS">, TB, XS, + EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSD">, XD, - REX_W, EVEX_4V, VEX_LIG, + SchedWriteFCmp.Scl, "VMAXCSD">, TB, XD, + REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, SIMD_EXC; defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS, - EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, + SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5, XS, + EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, NotEVEX2VEXConvertible; defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS, - EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, + SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5, XS, + EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, NotEVEX2VEXConvertible; multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -5556,21 +5556,21 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, - IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>; + IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>; let mayLoad = 1 in { defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), - ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, "${src2}"#_.BroadcastStr#", $src1", "$src1, ${src2}"#_.BroadcastStr, (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), - ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } } @@ -5586,7 +5586,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, "$rc, $src2, $src1", "$src1, $src2, $rc", (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 0, 0, 0, vselect_mask, ClobberConstraint>, - EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; + EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, @@ -5597,7 +5597,7 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, "{sae}, $src2, $src1", "$src1, $src2, {sae}", (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, - EVEX_4V, EVEX_B, Sched<[sched]>; + EVEX, VVVV, EVEX_B, Sched<[sched]>; } multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -5607,27 +5607,27 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op bit IsPD128Commutable = IsCommutable> { let Predicates = [prd] in { defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, - sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, + sched.PS.ZMM, IsCommutable>, EVEX_V512, TB, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, - sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, REX_W, + sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; } // Define only if AVX512VL feature is present. let Predicates = [prd, HasVLX] in { defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, - sched.PS.XMM, IsCommutable>, EVEX_V128, PS, + sched.PS.XMM, IsCommutable>, EVEX_V128, TB, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, - sched.PS.YMM, IsCommutable>, EVEX_V256, PS, + sched.PS.YMM, IsCommutable>, EVEX_V256, TB, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, sched.PD.XMM, IsPD128Commutable, - IsCommutable>, EVEX_V128, PD, REX_W, + IsCommutable>, EVEX_V128, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, - sched.PD.YMM, IsCommutable>, EVEX_V256, PD, REX_W, + sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; } } @@ -5637,15 +5637,15 @@ multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator O X86SchedWriteSizes sched, bit IsCommutable = 0> { let Predicates = [HasFP16] in { defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, - sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS, + sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; } let Predicates = [HasVLX, HasFP16] in { defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, - sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS, + sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>; defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, - sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS, + sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>; } } @@ -5656,14 +5656,14 @@ multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR let Predicates = [HasFP16] in { defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, v32f16_info>, - EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; + EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; } defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, v16f32_info>, - EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + EVEX_V512, TB, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, v8f64_info>, - EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>; + EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; } let Uses = [MXCSR] in @@ -5672,14 +5672,14 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd let Predicates = [HasFP16] in { defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, v32f16_info>, - EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; + EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; } defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, v16f32_info>, - EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + EVEX_V512, TB, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, v8f64_info>, - EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>; + EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; } defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, @@ -5734,18 +5734,18 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, "${src2}"#_.BroadcastStr#", $src1", "$src1, ${src2}"#_.BroadcastStr, (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, - EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5770,43 +5770,43 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr let Predicates = [HasFP16] in { defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, - EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; + EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>; defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, - EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>; + EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>; } defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD; + EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD; defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, - EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8PD; + EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, X86scalefsRnd, sched.Scl>, - EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD; + EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD; defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, X86scalefsRnd, sched.Scl>, - EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8PD; + EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, - EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD; + EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD; defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, - EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD; + EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD; defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, - EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8PD; + EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, - EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8PD; + EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; } let Predicates = [HasFP16, HasVLX] in { defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, - EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD; + EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD; defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, - EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD; + EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD; } } defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", @@ -5825,13 +5825,13 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (null_frag), (null_frag), 1>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; let mayLoad = 1 in defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (null_frag), (null_frag)>, - EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5844,7 +5844,7 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, "${src2}"#_.BroadcastStr#", $src1", "$src1, ${src2}"#_.BroadcastStr, (null_frag), (null_frag)>, - EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5898,9 +5898,9 @@ multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string Opcode avx512_vptest_dq<opc_dq, OpcodeStr, sched>; defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", - SchedWriteVecLogic>, T8PD; + SchedWriteVecLogic>, T8, PD; defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", - SchedWriteVecLogic>, T8XS; + SchedWriteVecLogic>, T8, XS; //===----------------------------------------------------------------------===// // AVX-512 Shift instructions @@ -5944,13 +5944,13 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, VR128X:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, - AVX512BIBase, EVEX_4V, Sched<[sched]>; + AVX512BIBase, EVEX, VVVV, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, i128mem:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, AVX512BIBase, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6035,22 +6035,22 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, - SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, - SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, SchedWriteVecShiftImm, 1>, avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, - SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, - SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, - SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SchedWriteVecShift>; @@ -6097,13 +6097,13 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, - AVX5128IBase, EVEX_4V, Sched<[sched]>; + AVX5128IBase, EVEX, VVVV, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (_.VT (_.LdFrag addr:$src2))))>, - AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6116,7 +6116,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, "${src2}"#_.BroadcastStr#", $src1", "$src1, ${src2}"#_.BroadcastStr, (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, - AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6374,14 +6374,14 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (Ctrl.VT Ctrl.RC:$src2)))>, - T8PD, EVEX_4V, Sched<[sched]>; + T8, PD, EVEX, VVVV, Sched<[sched]>; defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, - T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, @@ -6390,7 +6390,7 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, - T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6469,13 +6469,13 @@ def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, - Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; + Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; let isCommutable = 1 in def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, - Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; + Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; //===----------------------------------------------------------------------===// // VMOVHPS/PD VMOVLPS Instructions @@ -6494,19 +6494,19 @@ multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, (OpNode _.RC:$src1, (_.VT (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, - Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; + Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV; } // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in // SSE1. And MOVLPS pattern is even more complex. defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, - v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; + v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, - v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W; + v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, - v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; + v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, - v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W; + v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; let Predicates = [HasAVX512] in { // VMOVHPD patterns @@ -6565,14 +6565,14 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold, + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -6583,7 +6583,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), (MaskOpNode _.RC:$src2, _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, - EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, + EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } } @@ -6598,7 +6598,7 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, - EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; + EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -6627,13 +6627,13 @@ multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator O SDNode MaskOpNode, SDNode OpNodeRnd> { defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f16_info, HasFP16>, T_MAP6PD; + avx512vl_f16_info, HasFP16>, T_MAP6, PD; defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f32_info>, T8PD; + avx512vl_f32_info>, T8, PD; defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f64_info>, T8PD, REX_W; + avx512vl_f64_info>, T8, PD, REX_W; } defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, @@ -6660,14 +6660,14 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpcodeStr, "$src3, $src2", "$src2, $src3", (null_frag), (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold, + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -6679,7 +6679,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator _.RC:$src1)), (_.VT (MaskOpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), - _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B, + _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } @@ -6695,7 +6695,7 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), - 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; + 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -6724,13 +6724,13 @@ multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator O SDNode MaskOpNode, SDNode OpNodeRnd > { defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f16_info, HasFP16>, T_MAP6PD; + avx512vl_f16_info, HasFP16>, T_MAP6, PD; defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f32_info>, T8PD; + avx512vl_f32_info>, T8, PD; defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f64_info>, T8PD, REX_W; + avx512vl_f64_info>, T8, PD, REX_W; } defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, @@ -6756,7 +6756,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpcodeStr, "$src3, $src2", "$src2, $src3", (null_frag), (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -6765,7 +6765,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold, + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; // Pattern is 312 order so that the load is in a different place from the @@ -6778,7 +6778,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator _.RC:$src1, _.RC:$src2)), (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1, _.RC:$src2)), 1, 0>, - EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, + EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } } @@ -6793,7 +6793,7 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), - 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; + 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -6822,13 +6822,13 @@ multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator O SDNode MaskOpNode, SDNode OpNodeRnd > { defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f16_info, HasFP16>, T_MAP6PD; + avx512vl_f16_info, HasFP16>, T_MAP6, PD; defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f32_info>, T8PD; + avx512vl_f32_info>, T8, PD; defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, OpNodeRnd, SchedWriteFMA, - avx512vl_f64_info>, T8PD, REX_W; + avx512vl_f64_info>, T8, PD, REX_W; } defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, @@ -6851,33 +6851,33 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in { defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, - EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; + EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; let mayLoad = 1 in defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, - EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, + EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; let Uses = [MXCSR] in defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, - EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; + EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; let isCodeGenOnly = 1, isCommutable = 1 in { def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC; + !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC; def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, - SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC; + SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC; let Uses = [MXCSR] in def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), @@ -6885,7 +6885,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in { !strconcat(OpcodeStr, "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, - Sched<[SchedWriteFMA.Scl]>, EVEX_4V; + Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV; }// isCodeGenOnly = 1 }// Constraints = "$src1 = $dst" } @@ -6929,15 +6929,15 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, let Predicates = [HasAVX512] in { defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, OpNodeRnd, f32x_info, "SS">, - EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD; + EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD; defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, OpNodeRnd, f64x_info, "SD">, - EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD; + EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD; } let Predicates = [HasFP16] in { defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, OpNodeRnd, f16x_info, "SH">, - EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD; + EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD; } } @@ -7189,13 +7189,13 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, - T8PD, EVEX_4V, Sched<[sched]>; + T8, PD, EVEX, VVVV, Sched<[sched]>; defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, - T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold, + T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -7205,7 +7205,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1)>, - T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, + T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } } @@ -7247,19 +7247,19 @@ let ExeDomain = DstVT.ExeDomain, Uses = _Uses, def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), (ins DstVT.FRC:$src1, SrcRC:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, - EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; + EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), (ins DstVT.FRC:$src1, x86memop:$src), asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } // hasSideEffects = 0 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins DstVT.RC:$src1, SrcRC:$src2), !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, - EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; + EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins DstVT.RC:$src1, x86memop:$src2), @@ -7267,7 +7267,7 @@ let ExeDomain = DstVT.ExeDomain, Uses = _Uses, [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), (ld_frag addr:$src2)))]>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, @@ -7287,7 +7287,7 @@ multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2, (i32 timm:$rc)))]>, - EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; + EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; @@ -7307,18 +7307,18 @@ let Predicates = [HasAVX512] in { defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, - XS, EVEX_CD8<32, CD8VT1>; + TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, - XS, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; + TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SD, GR64, v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, - XD, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; @@ -7346,18 +7346,18 @@ def : Pat<(f64 (any_sint_to_fp GR64:$src)), defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, v4f32x_info, i32mem, loadi32, - "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; + "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, - XS, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, i32mem, loadi32, "cvtusi2sd", "l", [], 0>, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; + TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SD, GR64, v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, - XD, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; @@ -7422,28 +7422,28 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, // Convert float/double to signed/unsigned int 32/64 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, - XS, EVEX_CD8<32, CD8VT1>; + TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, - XS, REX_W, EVEX_CD8<32, CD8VT1>; + TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, - XS, EVEX_CD8<32, CD8VT1>; + TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, - XS, REX_W, EVEX_CD8<32, CD8VT1>; + TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, - XD, EVEX_CD8<64, CD8VT1>; + TB, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, - XD, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, - XD, EVEX_CD8<64, CD8VT1>; + TB, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, - XD, REX_W, EVEX_CD8<64, CD8VT1>; + TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, X86VectorVTInfo DstVT, SDNode OpNode, @@ -7463,13 +7463,13 @@ multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, } defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, - lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>; + lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, - llrint, WriteCvtSS2I>, REX_W, XS, EVEX_CD8<32, CD8VT1>; + llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, - lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>; + lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, - llrint, WriteCvtSD2I>, REX_W, XD, EVEX_CD8<64, CD8VT1>; + llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; let Predicates = [HasAVX512] in { def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; @@ -7609,29 +7609,29 @@ let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, - "{l}">, XS, EVEX_CD8<32, CD8VT1>; + "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, - "{q}">, REX_W, XS, EVEX_CD8<32, CD8VT1>; + "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, - "{l}">, XD, EVEX_CD8<64, CD8VT1>; + "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, - "{q}">, REX_W, XD, EVEX_CD8<64, CD8VT1>; + "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, - "{l}">, XS, EVEX_CD8<32, CD8VT1>; + "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, - "{q}">, XS,REX_W, EVEX_CD8<32, CD8VT1>; + "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, - "{l}">, XD, EVEX_CD8<64, CD8VT1>; + "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, - "{q}">, XD, REX_W, EVEX_CD8<64, CD8VT1>; + "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back @@ -7646,25 +7646,25 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _ "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT _.RC:$src1), (_Src.VT _Src.RC:$src2)))>, - EVEX_4V, VEX_LIG, Sched<[sched]>; + EVEX, VVVV, VEX_LIG, Sched<[sched]>; defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT _.RC:$src1), (_Src.ScalarIntMemFrags addr:$src2)))>, - EVEX_4V, VEX_LIG, + EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _Src.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX_4V, VEX_LIG, Sched<[sched]>; + EVEX, VVVV, VEX_LIG, Sched<[sched]>; let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -7678,7 +7678,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn "{sae}, $src2, $src1", "$src1, $src2, {sae}", (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_Src.VT _Src.RC:$src2)))>, - EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; + EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; } // Scalar Conversion with rounding control (RC) @@ -7691,7 +7691,7 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf "$rc, $src2, $src1", "$src1, $src2, $rc", (_.VT (OpNodeRnd (_.VT _.RC:$src1), (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, - EVEX_4V, VEX_LIG, Sched<[sched]>, + EVEX, VVVV, VEX_LIG, Sched<[sched]>, EVEX_B, EVEX_RC; } multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, @@ -7719,22 +7719,22 @@ multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, } defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, X86froundsRnd, WriteCvtSD2SS, f64x_info, - f32x_info>, XD, REX_W; + f32x_info>, TB, XD, REX_W; defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, X86fpextsSAE, WriteCvtSS2SD, f32x_info, - f64x_info>, XS; + f64x_info>, TB, XS; defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, X86froundsRnd, WriteCvtSD2SS, f64x_info, - f16x_info, HasFP16>, T_MAP5XD, REX_W; + f16x_info, HasFP16>, T_MAP5, XD, REX_W; defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, X86fpextsSAE, WriteCvtSS2SD, f16x_info, - f64x_info, HasFP16>, T_MAP5XS; + f64x_info, HasFP16>, T_MAP5, XS; defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, X86froundsRnd, WriteCvtSD2SS, f32x_info, - f16x_info, HasFP16>, T_MAP5PS; + f16x_info, HasFP16>, T_MAP5; defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, X86fpextsSAE, WriteCvtSS2SD, f16x_info, - f32x_info, HasFP16>, T_MAP6PS; + f32x_info, HasFP16>, T_MAP6; def : Pat<(f64 (any_fpextend FR32X:$src)), (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, @@ -7996,10 +7996,10 @@ multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, - REX_W, PD, EVEX_CD8<64, CD8VF>; + REX_W, TB, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, - PS, EVEX_CD8<32, CD8VH>; + TB, EVEX_CD8<32, CD8VH>; // Extend Half to Double multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, @@ -8108,14 +8108,14 @@ multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, avx512vl_f32_info, SchedWriteCvtPD2PS, - HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>; + HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>; defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, avx512vl_f16_info, SchedWriteCvtPS2PD, - HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>; + HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>; defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, - REX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>; + REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>; defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, - T_MAP5PS, EVEX_CD8<16, CD8VQ>; + T_MAP5, EVEX_CD8<16, CD8VQ>; let Predicates = [HasFP16, HasVLX] in { // Special patterns to allow use of X86vmfpround for masking. Instruction @@ -8596,120 +8596,120 @@ multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperato defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, X86any_VSintToFP, X86VSintToFP, - SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; + SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, - PS, EVEX_CD8<32, CD8VF>; + TB, EVEX_CD8<32, CD8VF>; defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, X86cvttp2si, X86cvttp2siSAE, - SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; + SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, X86cvttp2si, X86cvttp2siSAE, SchedWriteCvtPD2DQ>, - PD, REX_W, EVEX_CD8<64, CD8VF>; + TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, X86cvttp2ui, X86cvttp2uiSAE, - SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; + SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>; defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, X86cvttp2ui, X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, - PS, REX_W, EVEX_CD8<64, CD8VF>; + TB, REX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, uint_to_fp, X86any_VUintToFP, X86VUintToFP, - SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; + SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, uint_to_fp, X86VUintToFpRnd, - SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; + SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>; defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, - X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, + X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, EVEX_CD8<32, CD8VF>; defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, - X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, + X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD, REX_W, EVEX_CD8<64, CD8VF>; defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, - PS, EVEX_CD8<32, CD8VF>; + TB, EVEX_CD8<32, CD8VF>; defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, - PS, EVEX_CD8<64, CD8VF>; + TB, EVEX_CD8<64, CD8VF>; defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W, - PD, EVEX_CD8<64, CD8VF>; + TB, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, - X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, + X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, EVEX_CD8<32, CD8VH>; defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, - PD, EVEX_CD8<64, CD8VF>; + TB, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, - X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, + X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, X86cvttp2si, X86cvttp2siSAE, SchedWriteCvtPD2DQ>, REX_W, - PD, EVEX_CD8<64, CD8VF>; + TB, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, X86cvttp2si, X86cvttp2siSAE, - SchedWriteCvtPS2DQ>, PD, + SchedWriteCvtPS2DQ>, TB, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, X86cvttp2ui, X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, REX_W, - PD, EVEX_CD8<64, CD8VF>; + TB, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, X86cvttp2ui, X86cvttp2uiSAE, - SchedWriteCvtPS2DQ>, PD, + SchedWriteCvtPS2DQ>, TB, PD, EVEX_CD8<32, CD8VH>; defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, sint_to_fp, X86VSintToFpRnd, - SchedWriteCvtDQ2PD>, REX_W, XS, EVEX_CD8<64, CD8VF>; + SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, - REX_W, XS, EVEX_CD8<64, CD8VF>; + REX_W, TB, XS, EVEX_CD8<64, CD8VF>; defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, X86any_VSintToFP, X86VMSintToFP, X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, SchedWriteCvtDQ2PS, HasFP16>, - T_MAP5PS, EVEX_CD8<32, CD8VF>; + T_MAP5, EVEX_CD8<32, CD8VF>; defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, X86any_VUintToFP, X86VMUintToFP, X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, - SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD, + SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD, EVEX_CD8<32, CD8VF>; defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, X86any_VSintToFP, X86VMSintToFP, X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, - SchedWriteCvtDQ2PS>, REX_W, PS, + SchedWriteCvtDQ2PS>, REX_W, TB, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, X86any_VUintToFP, X86VMUintToFP, X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, - SchedWriteCvtDQ2PS>, REX_W, XD, + SchedWriteCvtDQ2PS>, REX_W, TB, XD, EVEX_CD8<64, CD8VF>; let Predicates = [HasVLX] in { @@ -8912,12 +8912,12 @@ multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", (X86any_cvtph2ps (_src.VT _src.RC:$src)), (X86cvtph2ps (_src.VT _src.RC:$src))>, - T8PD, Sched<[sched]>; + T8, PD, Sched<[sched]>; defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), "vcvtph2ps", "$src", "$src", (X86any_cvtph2ps (_src.VT ld_dag)), (X86cvtph2ps (_src.VT ld_dag))>, - T8PD, Sched<[sched.Folded]>; + T8, PD, Sched<[sched.Folded]>; } multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, @@ -8927,7 +8927,7 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, (ins _src.RC:$src), "vcvtph2ps", "{sae}, $src", "$src, {sae}", (X86cvtph2psSAE (_src.VT _src.RC:$src))>, - T8PD, EVEX_B, Sched<[sched]>; + T8, PD, EVEX_B, Sched<[sched]>; } let Predicates = [HasAVX512] in @@ -9068,55 +9068,55 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, + "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, PD, EVEX, + "ucomisd", SSEPackedDouble>, TB, PD, EVEX, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, - "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, + "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, - "comisd", SSEPackedDouble>, PD, EVEX, + "comisd", SSEPackedDouble>, TB, PD, EVEX, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; let isCodeGenOnly = 1 in { defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, + sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, + sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, + sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, + sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; } } let Defs = [EFLAGS], Predicates = [HasFP16] in { defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", - SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, + SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VT1>; defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", - SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, + SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VT1>; defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, - "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX, + "ucomish", SSEPackedSingle>, T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, - "comish", SSEPackedSingle>, T_MAP5PS, EVEX, + "comish", SSEPackedSingle>, T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; let isCodeGenOnly = 1 in { defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, sse_load_f16, "ucomish", SSEPackedSingle>, - T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; + T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, sse_load_f16, "comish", SSEPackedSingle>, - T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; + T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; } } @@ -9129,35 +9129,35 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, - EVEX_4V, VEX_LIG, Sched<[sched]>; + EVEX, VVVV, VEX_LIG, Sched<[sched]>; defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), - (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, + (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; } } defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, - T_MAP6PD; + T_MAP6, PD; defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, - EVEX_CD8<16, CD8VT1>, T_MAP6PD; + EVEX_CD8<16, CD8VT1>, T_MAP6, PD; let Uses = [MXCSR] in { defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, f32x_info>, EVEX_CD8<32, CD8VT1>, - T8PD; + T8, PD; defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>, - T8PD; + T8, PD; defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SchedWriteFRsqrt.Scl, f32x_info>, - EVEX_CD8<32, CD8VT1>, T8PD; + EVEX_CD8<32, CD8VT1>, T8, PD; defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SchedWriteFRsqrt.Scl, f64x_info>, REX_W, - EVEX_CD8<64, CD8VT1>, T8PD; + EVEX_CD8<64, CD8VT1>, T8, PD; } /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd @@ -9166,19 +9166,19 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "$src", "$src", - (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, + (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD, Sched<[sched]>; defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT - (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, + (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, (OpNode (_.VT (_.BroadcastLdFrag addr:$src)))>, - EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -9192,7 +9192,7 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, } let Predicates = [HasFP16] in defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, - v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; + v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX], Uses = [MXCSR] in { @@ -9212,10 +9212,10 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasFP16, HasVLX] in { defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.XMM, v8f16x_info>, - EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; + EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>; defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.YMM, v16f16x_info>, - EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; + EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>; } } @@ -9250,16 +9250,16 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, - sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V; + sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV; defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, - sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V; + sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV; } multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { let Predicates = [HasFP16] in defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, - EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V; + EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV; } let Predicates = [HasERI] in { @@ -9311,10 +9311,10 @@ multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched> { defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, - T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; + T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, @@ -9323,16 +9323,16 @@ multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>, - EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; + EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>, - EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; + EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>, - EVEX_V128, REX_W, T8PD, EVEX_CD8<64, CD8VF>; + EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>, - EVEX_V256, REX_W, T8PD, EVEX_CD8<64, CD8VF>; + EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; } } @@ -9341,12 +9341,12 @@ multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasFP16] in defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, - T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>; + T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>; let Predicates = [HasFP16, HasVLX] in { defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, - EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; + EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>; defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, - EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; + EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>; } } let Predicates = [HasERI] in { @@ -9401,35 +9401,35 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, let Predicates = [HasFP16] in defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), sched.PH.ZMM, v32f16_info>, - EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; + EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; let Predicates = [HasFP16, HasVLX] in { defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), sched.PH.XMM, v8f16x_info>, - EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>; + EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>; defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), sched.PH.YMM, v16f16x_info>, - EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>; + EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>; } defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), sched.PS.ZMM, v16f32_info>, - EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + EVEX_V512, TB, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), sched.PD.ZMM, v8f64_info>, - EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>; + EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), sched.PS.XMM, v4f32x_info>, - EVEX_V128, PS, EVEX_CD8<32, CD8VF>; + EVEX_V128, TB, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), sched.PS.YMM, v8f32x_info>, - EVEX_V256, PS, EVEX_CD8<32, CD8VF>; + EVEX_V256, TB, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), sched.PD.XMM, v2f64x_info>, - EVEX_V128, REX_W, PD, EVEX_CD8<64, CD8VF>; + EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), sched.PD.YMM, v4f64x_info>, - EVEX_V256, REX_W, PD, EVEX_CD8<64, CD8VF>; + EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; } } @@ -9439,13 +9439,13 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, let Predicates = [HasFP16] in defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), sched.PH.ZMM, v32f16_info>, - EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; + EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), sched.PS.ZMM, v16f32_info>, - EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + EVEX_V512, TB, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), sched.PD.ZMM, v8f64_info>, - EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>; + EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; } multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, @@ -9501,11 +9501,11 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, X86SchedWriteSizes sched> { defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, - EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS; + EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS; defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, - EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; + EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS; defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, - EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, REX_W; + EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W; } defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, @@ -9569,17 +9569,17 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, let Predicates = [HasFP16] in defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", SchedWriteFRnd.Scl, f16x_info>, - AVX512PSIi8Base, TA, EVEX_4V, + AVX512PSIi8Base, TA, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", SchedWriteFRnd.Scl, f32x_info>, - AVX512AIi8Base, EVEX_4V, VEX_LIG, + AVX512AIi8Base, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", SchedWriteFRnd.Scl, f64x_info>, - REX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, + REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, @@ -9923,16 +9923,16 @@ multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX, HasBWI] in { defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info, v16i8x_info, i64mem, LdFrag, InVecNode>, - EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, WIG; + EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG; defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info, v16i8x_info, i128mem, LdFrag, OpNode>, - EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, WIG; + EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG; } let Predicates = [HasBWI] in { defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info, v32i8x_info, i256mem, LdFrag, OpNode>, - EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, WIG; + EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG; } } @@ -9943,16 +9943,16 @@ multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, v16i8x_info, i32mem, LdFrag, InVecNode>, - EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, WIG; + EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG; defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, v16i8x_info, i64mem, LdFrag, InVecNode>, - EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, WIG; + EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG; } let Predicates = [HasAVX512] in { defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, v16i8x_info, i128mem, LdFrag, OpNode>, - EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, WIG; + EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG; } } @@ -9963,16 +9963,16 @@ multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, v16i8x_info, i16mem, LdFrag, InVecNode>, - EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, WIG; + EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG; defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, v16i8x_info, i32mem, LdFrag, InVecNode>, - EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, WIG; + EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG; } let Predicates = [HasAVX512] in { defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, v16i8x_info, i64mem, LdFrag, InVecNode>, - EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, WIG; + EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG; } } @@ -9983,16 +9983,16 @@ multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, v8i16x_info, i64mem, LdFrag, InVecNode>, - EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, WIG; + EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG; defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, v8i16x_info, i128mem, LdFrag, OpNode>, - EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, WIG; + EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG; } let Predicates = [HasAVX512] in { defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, v16i16x_info, i256mem, LdFrag, OpNode>, - EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, WIG; + EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG; } } @@ -10003,16 +10003,16 @@ multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, v8i16x_info, i32mem, LdFrag, InVecNode>, - EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, WIG; + EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG; defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, v8i16x_info, i64mem, LdFrag, InVecNode>, - EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, WIG; + EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG; } let Predicates = [HasAVX512] in { defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, v8i16x_info, i128mem, LdFrag, OpNode>, - EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, WIG; + EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG; } } @@ -10024,16 +10024,16 @@ multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, v4i32x_info, i64mem, LdFrag, InVecNode>, - EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; + EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128; defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, v4i32x_info, i128mem, LdFrag, OpNode>, - EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; + EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256; } let Predicates = [HasAVX512] in { defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, v8i32x_info, i256mem, LdFrag, OpNode>, - EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; + EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512; } } @@ -10663,7 +10663,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{ - let ExeDomain = DestInfo.ExeDomain in { + let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in { defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", @@ -10689,7 +10689,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _>: avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ - let ExeDomain = _.ExeDomain in + let ExeDomain = _.ExeDomain, ImmT = Imm8 in defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", @@ -10773,13 +10773,13 @@ multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { let Predicates = [Pred] in { defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, - SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; + SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV; } let Predicates = [Pred, HasVLX] in { defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, - SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; + SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV; defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, - SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; + SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV; } } @@ -10835,38 +10835,38 @@ defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, X86VRangeSAE, SchedWriteFAdd, HasDQI>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; + AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 0x50, X86VRange, X86VRangeSAE, SchedWriteFAdd, HasDQI>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W; + AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W; + AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, - AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; + AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W; + AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, - AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; + AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, @@ -10920,13 +10920,13 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched } defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, - avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, - avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; + avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, - avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, - avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; + avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; multiclass avx512_valign<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ @@ -10962,15 +10962,15 @@ multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, - AVX512AIi8Base, EVEX_4V, EVEX_V512; + AVX512AIi8Base, EVEX, VVVV, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, - AVX512AIi8Base, EVEX_4V, EVEX_V128; + AVX512AIi8Base, EVEX, VVVV, EVEX_V128; // We can't really override the 256-bit version so change it back to unset. let EVEX2VEXOverride = ? in defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, - AVX512AIi8Base, EVEX_4V, EVEX_V256; + AVX512AIi8Base, EVEX, VVVV, EVEX_V256; } } @@ -11258,7 +11258,7 @@ defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ> multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, - avx512vl_f32_info, HasAVX512>, XS; + avx512vl_f32_info, HasAVX512>, TB, XS; } defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, @@ -11301,7 +11301,7 @@ multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, multiclass avx512_movddup<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, - avx512vl_f64_info>, XD, REX_W; + avx512vl_f64_info>, TB, XD, REX_W; } defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; @@ -11369,9 +11369,9 @@ multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, - EVEX, TAPD, Sched<[WriteVecExtract]>; + EVEX, TA, PD, Sched<[WriteVecExtract]>; - defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; + defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD; } } @@ -11382,15 +11382,15 @@ multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, - EVEX, PD, Sched<[WriteVecExtract]>; + EVEX, TB, PD, Sched<[WriteVecExtract]>; let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX, TAPD, Sched<[WriteVecExtract]>; + EVEX, TA, PD, Sched<[WriteVecExtract]>; - defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; + defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD; } } @@ -11402,14 +11402,14 @@ multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GRC:$dst, (extractelt (_.VT _.RC:$src1), imm:$src2))]>, - EVEX, TAPD, Sched<[WriteVecExtract]>; + EVEX, TA, PD, Sched<[WriteVecExtract]>; def mr : AVX512Ii8<0x16, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (_.VT _.RC:$src1), imm:$src2),addr:$dst)]>, - EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, + EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD, Sched<[WriteVecExtractSt]>; } } @@ -11427,7 +11427,7 @@ multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, - EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; + EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -11437,7 +11437,7 @@ multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, - (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, + (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV, Sched<[WriteVecInsert]>; defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; @@ -11452,17 +11452,17 @@ multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, - EVEX_4V, TAPD, Sched<[WriteVecInsert]>; + EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>; defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, - _.ScalarLdFrag, imm>, TAPD; + _.ScalarLdFrag, imm>, TA, PD; } } defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, - extloadi8>, TAPD, WIG; + extloadi8>, TA, PD, WIG; defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, - extloadi16>, PD, WIG; + extloadi16>, TB, PD, WIG; defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W; @@ -11501,11 +11501,11 @@ multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, SchedWriteFShuffle>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, - AVX512AIi8Base, EVEX_4V; + TA, EVEX, VVVV; } -defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS; -defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, REX_W; +defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB; +defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W; //===----------------------------------------------------------------------===// // AVX-512 - Byte shift Left/Right @@ -11543,10 +11543,10 @@ multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, } defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", SchedWriteShuffle, HasBWI>, - AVX512PDIi8Base, EVEX_4V, WIG; + AVX512PDIi8Base, EVEX, VVVV, WIG; defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", SchedWriteShuffle, HasBWI>, - AVX512PDIi8Base, EVEX_4V, WIG; + AVX512PDIi8Base, EVEX, VVVV, WIG; multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, string OpcodeStr, X86FoldableSchedWrite sched, @@ -11584,7 +11584,7 @@ multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, } defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", - SchedWritePSADBW, HasBWI>, EVEX_4V, WIG; + SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG; // Transforms to swizzle an immediate to enable better matching when // memory operand isn't in the right place. @@ -11659,7 +11659,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_.VT _.RC:$src3), (i8 timm:$src4)), 1, 1>, - AVX512AIi8Base, EVEX_4V, Sched<[sched]>; + AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", @@ -11667,7 +11667,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_.VT (bitconvert (_.LdFrag addr:$src3))), (i8 timm:$src4)), 1, 0>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), @@ -11677,7 +11677,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_.VT (_.BroadcastLdFrag addr:$src3)), (i8 timm:$src4)), 1, 0>, EVEX_B, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; }// Constraints = "$src1 = $dst" @@ -12002,23 +12002,23 @@ multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, let Predicates = [HasAVX512] in defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, _Vec.info512, _Tbl.info512>, AVX512AIi8Base, - EVEX_4V, EVEX_V512; + EVEX, VVVV, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, _Vec.info128, _Tbl.info128>, AVX512AIi8Base, - EVEX_4V, EVEX_V128; + EVEX, VVVV, EVEX_V128; defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, _Vec.info256, _Tbl.info256>, AVX512AIi8Base, - EVEX_4V, EVEX_V256; + EVEX, VVVV, EVEX_V256; } } defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, - AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W; + AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, @@ -12165,17 +12165,17 @@ multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { defm Z128 : AESI_binop_rm_int<Op, OpStr, !cast<Intrinsic>(IntPrefix), loadv2i64, 0, VR128X, i128mem>, - EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG; + EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG; defm Z256 : AESI_binop_rm_int<Op, OpStr, !cast<Intrinsic>(IntPrefix#"_256"), loadv4i64, 0, VR256X, i256mem>, - EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG; + EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG; } let Predicates = [HasAVX512, HasVAES] in defm Z : AESI_binop_rm_int<Op, OpStr, !cast<Intrinsic>(IntPrefix#"_512"), loadv8i64, 0, VR512, i512mem>, - EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG; + EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG; } defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; @@ -12189,14 +12189,14 @@ defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast"> let Predicates = [HasAVX512, HasVPCLMULQDQ] in defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, - EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG; + EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG; let Predicates = [HasVLX, HasVPCLMULQDQ] in { defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, - EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG; + EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG; defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, - int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, + int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256, EVEX_CD8<64, CD8VF>, WIG; } @@ -12217,13 +12217,13 @@ multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, "$src3, $src2", "$src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, - T8PD, EVEX_4V, Sched<[sched]>; + T8, PD, EVEX, VVVV, Sched<[sched]>; defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, "$src3, $src2", "$src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (VTI.LdFrag addr:$src3))))>, - T8PD, EVEX_4V, + T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -12239,7 +12239,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, "$src2, ${src3}"#VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, - T8PD, EVEX_4V, EVEX_B, + T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -12284,9 +12284,9 @@ multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, REX_W, EVEX_CD8<16, CD8VF>; defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, - OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, - sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; + sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; } // Concat & Shift @@ -12321,13 +12321,13 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)), IsCommutable, IsCommutable>, - EVEX_4V, T8PD, Sched<[sched]>; + EVEX, VVVV, T8, PD, Sched<[sched]>; defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, "$src3, $src2", "$src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (VTI.LdFrag addr:$src3))))>, - EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, + EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), @@ -12336,8 +12336,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, "$src2, ${src3}"#VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, - EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, - T8PD, Sched<[sched.Folded, sched.ReadAfterFold, + EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B, + T8, PD, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } } @@ -12406,7 +12406,7 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2)), (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), - (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, + (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD, Sched<[sched]>; defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), (ins VTI.RC:$src1, VTI.MemOp:$src2), @@ -12416,7 +12416,7 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { (VTI.VT (VTI.LdFrag addr:$src2))), (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.LdFrag addr:$src2)))>, - EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, + EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -12451,7 +12451,7 @@ multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, SchedWriteVecALU>, - EVEX_CD8<8, CD8VF>, T8PD; + EVEX_CD8<8, CD8VF>, T8; multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo VTI, @@ -12483,10 +12483,10 @@ multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", X86GF2P8affineinvqb, SchedWriteVecIMul>, - EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; + EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", X86GF2P8affineqb, SchedWriteVecIMul>, - EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; + EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; //===----------------------------------------------------------------------===// @@ -12498,25 +12498,25 @@ let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "v4fmaddps", "$src3, $src2", "$src2, $src3", - []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, + []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, Sched<[SchedWriteFMA.ZMM.Folded]>; defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "v4fnmaddps", "$src3, $src2", "$src2, $src3", - []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, + []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, Sched<[SchedWriteFMA.ZMM.Folded]>; defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), "v4fmaddss", "$src3, $src2", "$src2, $src3", - []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, + []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, Sched<[SchedWriteFMA.Scl.Folded]>; defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), "v4fnmaddss", "$src3, $src2", "$src2, $src3", - []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, + []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, Sched<[SchedWriteFMA.Scl.Folded]>; } @@ -12529,13 +12529,13 @@ let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "vp4dpwssd", "$src3, $src2", "$src2, $src3", - []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, + []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, Sched<[SchedWriteFMA.ZMM.Folded]>; defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "vp4dpwssds", "$src3, $src2", "$src2, $src3", - []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, + []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, Sched<[SchedWriteFMA.ZMM.Folded]>; } @@ -12558,7 +12558,7 @@ multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInf "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRPC:$dst, (X86vp2intersect _.RC:$src1, (_.VT _.RC:$src2)))]>, - EVEX_4V, T8XD, Sched<[sched]>; + EVEX, VVVV, T8, XD, Sched<[sched]>; def rm : I<0x68, MRMSrcMem, (outs _.KRPC:$dst), @@ -12567,7 +12567,7 @@ multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInf "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRPC:$dst, (X86vp2intersect _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, - EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, + EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmb : I<0x68, MRMSrcMem, @@ -12577,7 +12577,7 @@ multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInf ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), [(set _.KRPC:$dst, (X86vp2intersect _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, - EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -12623,7 +12623,7 @@ let ExeDomain = SSEPackedSingle in defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF avx512vl_f32_info, avx512vl_bf16_info, - X86cvtne2ps2bf16, HasBF16, 0>, T8XD; + X86cvtne2ps2bf16, HasBF16, 0>, T8, XD; // Truncate Float to BFloat16 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, @@ -12660,7 +12660,7 @@ multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, } defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", - SchedWriteCvtPD2PS>, T8XS, + SchedWriteCvtPD2PS>, T8, XS, EVEX_CD8<32, CD8VF>; let Predicates = [HasBF16, HasVLX] in { @@ -12744,13 +12744,13 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins src_v.RC:$src2, src_v.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins src_v.RC:$src2, src_v.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, - (src_v.LdFrag addr:$src3)))>, EVEX_4V, + (src_v.LdFrag addr:$src3)))>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -12760,7 +12760,7 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat("$src2, ${src3}", _.BroadcastStr), (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, - EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } } // Constraints = "$src1 = $dst" @@ -12783,7 +12783,7 @@ multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, let ExeDomain = SSEPackedSingle in defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, avx512vl_f32_info, avx512vl_bf16_info, - HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; + HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>; //===----------------------------------------------------------------------===// // AVX512FP16 @@ -12792,12 +12792,12 @@ defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWrit let Predicates = [HasFP16] in { // Move word ( r/m16) to Packed word def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), - "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>; + "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>; def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), "vmovw\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, - T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; + T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; def : Pat<(f16 (bitconvert GR16:$src)), (f16 (COPY_TO_REGCLASS @@ -12854,13 +12854,13 @@ def : Pat<(v16i32 (X86vzmovl // Move word from xmm register to r/m16 def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), - "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>; + "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>; def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), (ins i16mem:$dst, VR128X:$src), "vmovw\t{$src, $dst|$dst, $src}", [(store (i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), addr:$dst)]>, - T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; + T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; def : Pat<(i16 (bitconvert FR16X:$src)), (i16 (EXTRACT_SUBREG @@ -12872,9 +12872,9 @@ def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), // Allow "vmovw" to use GR64 let hasSideEffects = 0 in { def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), - "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; + "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), - "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>; + "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>; } } @@ -12920,27 +12920,27 @@ multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, X86cvtp2UIntRnd, avx512vl_i16_info, avx512vl_f16_info, SchedWriteCvtPD2DQ>, - T_MAP5PS, EVEX_CD8<16, CD8VF>; + T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i16_info, SchedWriteCvtPD2DQ>, - T_MAP5XD, EVEX_CD8<16, CD8VF>; + T_MAP5, XD, EVEX_CD8<16, CD8VF>; defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, X86cvttp2si, X86cvttp2siSAE, avx512vl_i16_info, avx512vl_f16_info, - SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>; + SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>; defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, X86cvttp2ui, X86cvttp2uiSAE, avx512vl_i16_info, avx512vl_f16_info, - SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>; + SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, X86cvtp2IntRnd, avx512vl_i16_info, avx512vl_f16_info, SchedWriteCvtPD2DQ>, - T_MAP5PD, EVEX_CD8<16, CD8VF>; + T_MAP5, PD, EVEX_CD8<16, CD8VF>; defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i16_info, SchedWriteCvtPD2DQ>, - T_MAP5XS, EVEX_CD8<16, CD8VF>; + T_MAP5, XS, EVEX_CD8<16, CD8VF>; // Convert Half to Signed/Unsigned Doubleword multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -12980,20 +12980,20 @@ multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpN defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, - X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, + X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VH>; defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, - X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS, + X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, EVEX_CD8<16, CD8VH>; defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, X86cvttp2si, X86cvttp2siSAE, - SchedWriteCvtPS2DQ>, T_MAP5XS, + SchedWriteCvtPS2DQ>, T_MAP5, XS, EVEX_CD8<16, CD8VH>; defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, X86cvttp2ui, X86cvttp2uiSAE, - SchedWriteCvtPS2DQ>, T_MAP5PS, + SchedWriteCvtPS2DQ>, T_MAP5, EVEX_CD8<16, CD8VH>; // Convert Half to Signed/Unsigned Quardword @@ -13043,21 +13043,21 @@ multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpN } defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, - X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, + X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VQ>; defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, - X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, + X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VQ>; defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, X86cvttp2si, X86cvttp2siSAE, - SchedWriteCvtPS2DQ>, T_MAP5PD, + SchedWriteCvtPS2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VQ>; defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, X86cvttp2ui, X86cvttp2uiSAE, - SchedWriteCvtPS2DQ>, T_MAP5PD, + SchedWriteCvtPS2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VQ>; // Convert Signed/Unsigned Quardword to Half @@ -13154,53 +13154,53 @@ multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo } defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, - X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5PS, + X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, - X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5XD, + X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD, EVEX_CD8<64, CD8VF>; // Convert half to signed/unsigned int 32/64 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, - T_MAP5XS, EVEX_CD8<16, CD8VT1>; + T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, - T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>; + T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, - T_MAP5XS, EVEX_CD8<16, CD8VT1>; + T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, - T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>; + T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, - "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; + "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, - "{q}", HasFP16>, REX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>; + "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, - "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; + "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, - "{q}", HasFP16>, T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>; + "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; let Predicates = [HasFP16] in { defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, - T_MAP5XS, EVEX_CD8<32, CD8VT1>; + T_MAP5, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, - T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>; + T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, v8f16x_info, i32mem, loadi32, - "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>; + "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>; defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, - T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>; + T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; @@ -13390,17 +13390,17 @@ let Constraints = "@earlyclobber $dst, $src1 = $dst" in { defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V; + (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV; defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V; + (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV; defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), - (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V; + (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV; } } // Constraints = "@earlyclobber $dst, $src1 = $dst" @@ -13411,7 +13411,7 @@ multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, - EVEX_4V, EVEX_B, EVEX_RC; + EVEX, VVVV, EVEX_B, EVEX_RC; } @@ -13446,14 +13446,14 @@ multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, let Uses = [MXCSR] in { defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, - T_MAP6XS, EVEX_CD8<32, CD8VF>; + T_MAP6, XS, EVEX_CD8<32, CD8VF>; defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, - T_MAP6XD, EVEX_CD8<32, CD8VF>; + T_MAP6, XD, EVEX_CD8<32, CD8VF>; defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, - x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>; + x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>; defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, - x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>; + x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>; } @@ -13504,12 +13504,12 @@ multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNod let Uses = [MXCSR] in { defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, - T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; + T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, - T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; + T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, - T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; + T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, - T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; + T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td index 8c355e84a065..936db48bb9df 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -44,511 +44,396 @@ def PLEA32r : PseudoI<(outs GR32:$dst), (ins anymem:$src), []>; def PLEA64r : PseudoI<(outs GR64:$dst), (ins anymem:$src), []>; } -//===----------------------------------------------------------------------===// -// Fixed-Register Multiplication and Division Instructions. -// - -// BinOpRR - Binary instructions with inputs "reg, reg". -class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, X86FoldableSchedWrite sched, list<dag> pattern> - : ITy<opcode, MRMDestReg, typeinfo, outlist, - (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, - Sched<[sched]>; - -// BinOpRR_F - Binary instructions with inputs "reg, reg", where the pattern -// has just a EFLAGS as a result. -class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDPatternOperator opnode> - : BinOpRR<opcode, mnemonic, typeinfo, (outs), WriteALU, - [(set EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; - -// BinOpRR_RF - Binary instructions with inputs "reg, reg", where the pattern -// has both a regclass and EFLAGS as a result. -class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode> - : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteALU, - [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; - -// BinOpRR_RFF - Binary instructions with inputs "reg, reg", where the pattern -// has both a regclass and EFLAGS as a result, and has EFLAGS as input. -class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode> - : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC, - [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2, - EFLAGS))]>; - -// BinOpRR_Rev - Binary instructions with inputs "reg, reg"(reversed encoding). -class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - X86FoldableSchedWrite sched = WriteALU> - : ITy<opcode, MRMSrcReg, typeinfo, - (outs typeinfo.RegClass:$dst), - (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", []>, - Sched<[sched]> { - // The disassembler should know about this, but not the asmparser. - let isCodeGenOnly = 1; - let ForceDisassemble = 1; - let hasSideEffects = 0; +// BinOpRR - Instructions that read "reg, reg". +class BinOpRR<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p> + : ITy<o, MRMDestReg, t, out, (ins t.RegClass:$src1, t.RegClass:$src2), m, + binop_args, p>, Sched<[WriteALU]>; +// BinOpRR_F - Instructions that read "reg, reg" and write EFLAGS only. +class BinOpRR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpRR<o, m, t, (outs), + [(set EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2))]>, + DefEFLAGS; +// BinOpRR_F_Rev - Reversed encoding of BinOpRR_F +class BinOpRR_F_Rev<bits<8> o, string m, X86TypeInfo t> + : BinOpRR_F<o, m, t, null_frag>, DisassembleOnly { + let Form = MRMSrcReg; } - -// BinOpRR_RFF_Rev - Binary instructions with inputs "reg, reg"(reversed -// encoding), with sched = WriteADC. -class BinOpRR_RFF_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> - : BinOpRR_Rev<opcode, mnemonic, typeinfo, WriteADC>; - -// BinOpRR_F_Rev - Binary instructions with inputs "reg, reg"(reversed -// encoding), without outlist dag. -class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> - : ITy<opcode, MRMSrcReg, typeinfo, (outs), - (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", []>, - Sched<[WriteALU]> { - // The disassembler should know about this, but not the asmparser. - let isCodeGenOnly = 1; - let ForceDisassemble = 1; - let hasSideEffects = 0; +// BinOpRR_RF - Instructions that read "reg, reg", and write "reg", EFLAGS. +class BinOpRR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpRR<o, m, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2))]>, DefEFLAGS; +// BinOpRR_RF_Rev - Reversed encoding of BinOpRR_RF. +class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t> + : BinOpRR_RF<o, m, t, null_frag>, DisassembleOnly { + let Form = MRMSrcReg; +} +// BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write +// EFLAGS. +class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpRR<o, m, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2, + EFLAGS))]>, DefEFLAGS, UseEFLAGS { + let SchedRW = [WriteADC]; +} +// BinOpRRF_RF_Rev - Reversed encoding of BinOpRRF_RF +class BinOpRRF_RF_Rev<bits<8> o, string m, X86TypeInfo t> + : BinOpRRF_RF<o, m, t, null_frag>, DisassembleOnly { + let Form = MRMSrcReg; } -// BinOpRM - Binary instructions with inputs "reg, [mem]". -class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, X86FoldableSchedWrite sched, list<dag> pattern> - : ITy<opcode, MRMSrcMem, typeinfo, outlist, - (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, - Sched<[sched.Folded, sched.ReadAfterFold]>; - -// BinOpRM_ImplicitUse - Binary instructions with inputs "reg, [mem]". -// There is an implicit register read at the end of the operand sequence. -class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, X86FoldableSchedWrite sched, list<dag> pattern> - : ITy<opcode, MRMSrcMem, typeinfo, outlist, - (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, - Sched<[sched.Folded, sched.ReadAfterFold, - // base, scale, index, offset, segment. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // implicit register read. - sched.ReadAfterFold]>; - -// BinOpRM_F - Binary instructions with inputs "reg, [mem]", where the pattern -// has just a EFLAGS as a result. -class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode> - : BinOpRM<opcode, mnemonic, typeinfo, (outs), WriteALU, - [(set EFLAGS, - (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>; - -// BinOpRM_RF - Binary instructions with inputs "reg, [mem]", where the pattern -// has both a regclass and EFLAGS as a result. -class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode> - : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteALU, - [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>; - -// BinOpRM_RFF - Binary instructions with inputs "reg, [mem]", where the pattern -// has both a regclass and EFLAGS as a result, and has EFLAGS as input. -class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode> - : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, - (outs typeinfo.RegClass:$dst), WriteADC, - [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, - (typeinfo.LoadNode addr:$src2), EFLAGS))]>; - -// BinOpRI - Binary instructions with inputs "reg, imm". -class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - Format f, dag outlist, X86FoldableSchedWrite sched, list<dag> pattern> - : ITy<opcode, f, typeinfo, outlist, - (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, - Sched<[sched]> { - let ImmT = typeinfo.ImmEncoding; +// BinOpRM - Instructions that read "reg, [mem]". +class BinOpRM<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p> + : ITy<o, MRMSrcMem, t, out, (ins t.RegClass:$src1, t.MemOperand:$src2), m, + binop_args, p>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]> { + let mayLoad = 1; +} +// BinOpRM_F - Instructions that read "reg, [mem]" and write EFLAGS only. +class BinOpRM_F<bits<8> o, string m, X86TypeInfo t, SDNode node> + : BinOpRM<o, m, t, (outs), + [(set EFLAGS, (node t.RegClass:$src1, + (t.LoadNode addr:$src2)))]>, DefEFLAGS; +// BinOpRM_RF - Instructions that read "reg, reg", and write "reg", EFLAGS. +class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpRM<o, m, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, + (t.LoadNode addr:$src2)))]>, DefEFLAGS; +// BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write +// EFLAGS. +class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpRM<o, m, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>, + DefEFLAGS, UseEFLAGS { + let SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold, + // base, scale, index, offset, segment. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // implicit register read. + WriteADC.ReadAfterFold]; } -// BinOpRI_F - Binary instructions with inputs "reg, imm", where the pattern -// has EFLAGS as a result. -class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDPatternOperator opnode, Format f> - : BinOpRI<opcode, mnemonic, typeinfo, f, (outs), WriteALU, - [(set EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; - -// BinOpRI_RF - Binary instructions with inputs "reg, imm", where the pattern -// has both a regclass and EFLAGS as a result. -class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode, Format f> - : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteALU, - [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; - -// BinOpRI_RFF - Binary instructions with inputs "reg, imm", where the pattern -// has both a regclass and EFLAGS as a result, and has EFLAGS as input. -class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode, Format f> - : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteADC, - [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2, - EFLAGS))]>; - -// BinOpRI8 - Binary instructions with inputs "reg, imm8". -class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - Format f, dag outlist, X86FoldableSchedWrite sched, list<dag> pattern> - : ITy<opcode, f, typeinfo, outlist, - (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, - Sched<[sched]> { - let ImmT = Imm8; // Always 8-bit immediate. +// BinOpRI - Instructions that read "reg, imm". +class BinOpRI<bits<8> o, string m, X86TypeInfo t, Format f, dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.RegClass:$src1, t.ImmOperand:$src2), m, + binop_args, p>, Sched<[WriteALU]> { + let ImmT = t.ImmEncoding; +} +// BinOpRI_F - Instructions that read "reg, imm" and write EFLAGS only. +class BinOpRI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + Format f> + : BinOpRI<o, m, t, f, (outs), + [(set EFLAGS, (node t.RegClass:$src1, + t.ImmOperator:$src2))]>, DefEFLAGS; +// BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS. +class BinOpRI_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> + : BinOpRI<o, m, t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.ImmOperator:$src2))]>, DefEFLAGS; +// BinOpRIF_RF - Instructions that read "reg, imm", write "reg" and read/write +// EFLAGS. +class BinOpRIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> + : BinOpRI<o, m, t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.ImmOperator:$src2, + EFLAGS))]>, DefEFLAGS, UseEFLAGS { + let SchedRW = [WriteADC]; +} +// BinOpRI8 - Instructions that read "reg, imm8". +class BinOpRI8<bits<8> o, string m, X86TypeInfo t, Format f, dag out> + : ITy<o, f, t, out, (ins t.RegClass:$src1, t.Imm8Operand:$src2), m, + binop_args, []>, Sched<[WriteALU]> { + let ImmT = Imm8; +} +// BinOpRI8_F - Instructions that read "reg, imm8" and write EFLAGS only. +class BinOpRI8_F<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpRI8<o, m, t, f, (outs)>, DefEFLAGS; +// BinOpRI8_RF - Instructions that read "reg, imm8" and write "reg", EFLAGS. +class BinOpRI8_RF<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS; +// BinOpRI8F_RF - Instructions that read "reg, imm", write "reg" and read/write +// EFLAGS. +class BinOpRI8F_RF<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS, UseEFLAGS { + let SchedRW = [WriteADC]; } -// BinOpRI8_F - Binary instructions with inputs "reg, imm8". -class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f> - : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs), WriteALU, []>; - -// BinOpRI8_RF - Binary instructions with inputs "reg, imm8". -class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f> - : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteALU, []>; - -// BinOpRI8_RFF - Binary instructions with inputs "reg, imm8". -class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f> - : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteADC, []>; - -// BinOpMR - Binary instructions with inputs "[mem], reg". -class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - list<dag> pattern> - : ITy<opcode, MRMDestMem, typeinfo, - (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern>; - -// BinOpMR_RMW - Binary instructions with inputs "[mem], reg", where the pattern -// implicitly use EFLAGS. -class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode> - : BinOpMR<opcode, mnemonic, typeinfo, - [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst), +// BinOpMR - Instructions that read "[mem], reg". +class BinOpMR<bits<8> o, string m, X86TypeInfo t, list<dag> p> + : ITy<o, MRMDestMem, t, (outs), (ins t.MemOperand:$src1, t.RegClass:$src2), m, + binop_args, p> { + let mayLoad = 1; +} +// BinOpMR_F - Instructions that read "[mem], imm8" and write EFLAGS only. +class BinOpMR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, t, + [(set EFLAGS, (node (t.LoadNode addr:$src1), t.RegClass:$src2))]>, + Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>, DefEFLAGS; +// BinOpMR_MF - Instructions that read "[mem], reg" and write "[mem]", EFLAGS. +class BinOpMR_MF<bits<8> o, string m, X86TypeInfo t, SDNode node> + : BinOpMR<o, m, t, + [(store (node (load addr:$src1), t.RegClass:$src2), addr:$src1), (implicit EFLAGS)]>, Sched<[WriteALURMW, // base, scale, index, offset, segment - ReadDefault, ReadDefault, ReadDefault, - ReadDefault, ReadDefault, - WriteALU.ReadAfterFold]>; // reg - -// BinOpMR_RMW_FF - Binary instructions with inputs "[mem], reg", where the -// pattern sets EFLAGS and implicitly uses EFLAGS. -class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode> - : BinOpMR<opcode, mnemonic, typeinfo, - [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS), - addr:$dst), - (implicit EFLAGS)]>, + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + WriteALU.ReadAfterFold]>, // reg + DefEFLAGS { + let mayStore = 1; +} +// BinOpMRF_MF - Instructions that read "[mem], reg", write "[mem]" and +// read/write EFLAGS. +class BinOpMRF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node> + : BinOpMR<o, m, t, + [(store (node (load addr:$src1), t.RegClass:$src2, EFLAGS), + addr:$src1), (implicit EFLAGS)]>, Sched<[WriteADCRMW, // base, scale, index, offset, segment ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, WriteALU.ReadAfterFold, // reg - WriteALU.ReadAfterFold]>; // EFLAGS - -// BinOpMR_F - Binary instructions with inputs "[mem], reg", where the pattern -// has EFLAGS as a result. -class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDPatternOperator opnode> - : BinOpMR<opcode, mnemonic, typeinfo, - [(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst), - typeinfo.RegClass:$src))]>, - Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>; - -// BinOpMI - Binary instructions with inputs "[mem], imm". -class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - Format f, list<dag> pattern> - : ITy<opcode, f, typeinfo, - (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern> { - let ImmT = typeinfo.ImmEncoding; + WriteALU.ReadAfterFold]>, // EFLAGS + DefEFLAGS, UseEFLAGS { + let mayStore = 1; } -// BinOpMI_RMW - Binary instructions with inputs "[mem], imm", where the -// pattern implicitly use EFLAGS. -class BinOpMI_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode, Format f> - : BinOpMI<opcode, mnemonic, typeinfo, f, - [(store (opnode (typeinfo.VT (load addr:$dst)), - typeinfo.ImmOperator:$src), addr:$dst), - (implicit EFLAGS)]>, - Sched<[WriteALURMW]>; - -// BinOpMI_RMW_FF - Binary instructions with inputs "[mem], imm", where the -// pattern sets EFLAGS and implicitly uses EFLAGS. -class BinOpMI_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode, Format f> - : BinOpMI<opcode, mnemonic, typeinfo, f, - [(store (opnode (typeinfo.VT (load addr:$dst)), - typeinfo.ImmOperator:$src, EFLAGS), addr:$dst), - (implicit EFLAGS)]>, - Sched<[WriteADCRMW]>; - -// BinOpMI_F - Binary instructions with inputs "[mem], imm", where the pattern -// has EFLAGS as a result. -class BinOpMI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - SDPatternOperator opnode, Format f> - : BinOpMI<opcode, mnemonic, typeinfo, f, - [(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst), - typeinfo.ImmOperator:$src))]>, - Sched<[WriteALU.Folded]>; - -// BinOpMI8 - Binary instructions with inputs "[mem], imm8". -class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, - Format f, list<dag> pattern> - : ITy<0x82, f, typeinfo, - (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern> { - let ImmT = Imm8; // Always 8-bit immediate. +// BinOpMI - Instructions that read "[mem], imm". +class BinOpMI<bits<8> o, string m, X86TypeInfo t, Format f, list<dag> p> + : ITy<o, f, t, (outs), (ins t.MemOperand:$src1, t.ImmOperand:$src2), m, + binop_args, p> { + let ImmT = t.ImmEncoding; + let mayLoad = 1; +} +// BinOpMI_F - Instructions that read "[mem], imm" and write EFLAGS only. +class BinOpMI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + Format f> + : BinOpMI<o, m, t, f, + [(set EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>, + Sched<[WriteALU.Folded]>, DefEFLAGS; +// BinOpMI_MF - Instructions that read "[mem], imm" and write "[mem]", EFLAGS. +class BinOpMI_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> + : BinOpMI<o, m, t, f, + [(store (node (t.VT (load addr:$src1)), + t.ImmOperator:$src2), addr:$src1), (implicit EFLAGS)]>, + Sched<[WriteALURMW]>, DefEFLAGS { + let mayStore = 1; +} +// BinOpMIF_MF - Instructions that read "[mem], imm", write "[mem]" and +// read/write EFLAGS. +class BinOpMIF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> + : BinOpMI<o, m, t, f, + [(store (node (t.VT (load addr:$src1)), + t.ImmOperator:$src2, EFLAGS), addr:$src1), (implicit EFLAGS)]>, + Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { + let mayStore = 1; } -// BinOpMI8_RMW - Binary instructions with inputs "[mem], imm8". -class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo, Format f> - : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteALURMW]>; - -// BinOpMI8_RMW_FF - Binary instructions with inputs "[mem], imm8". -class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo, Format f> - : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteADCRMW]>; - -// BinOpMI8_F - Binary instructions with inputs "[mem], imm8" -class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo, Format f> - : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteALU.Folded]>; - -// BinOpAI - Binary instructions with input imm, that implicitly use A reg and -// implicitly define Areg and EFLAGS. -class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - Register areg, string operands, X86FoldableSchedWrite sched = WriteALU> - : ITy<opcode, RawFrm, typeinfo, - (outs), (ins typeinfo.ImmOperand:$src), - mnemonic, operands, []>, - Sched<[sched]> { - let ImmT = typeinfo.ImmEncoding; +// BinOpMI8 - Instructions that read "[mem], imm8". +class BinOpMI8<string m, X86TypeInfo t, Format f> + : ITy<0x83, f, t, (outs), (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m, + binop_args, []> { + let ImmT = Imm8; + let mayLoad = 1; +} +// BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only. +class BinOpMI8_F<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, t, f>, Sched<[WriteALU.Folded]>, DefEFLAGS; +// BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS. +class BinOpMI8_MF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, t, f>, Sched<[WriteALURMW]>, DefEFLAGS { + let mayStore = 1; +} +// BinOpMI8F_MF - Instructions that read "[mem], imm8", write "[mem]" and +// read/write EFLAGS. +class BinOpMI8F_MF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, t, f>, Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { + let mayStore = 1; +} + +// BinOpAI - Instructions that read "a-reg imm" (Accumulator register). +class BinOpAI<bits<8> o, string m, X86TypeInfo t, Register areg, string args> + : ITy<o, RawFrm, t, (outs), (ins t.ImmOperand:$src), m, args, []>, + Sched<[WriteALU]> { + let ImmT = t.ImmEncoding; let Uses = [areg]; - let Defs = [areg, EFLAGS]; - let hasSideEffects = 0; } +// BinOpAI_F - Instructions that read "a-reg imm" and write EFLAGS only. +class BinOpAI_F<bits<8> o, string m, X86TypeInfo t, Register areg, string args> + : BinOpAI<o, m, t, areg, args>, DefEFLAGS; -// BinOpAI_RFF - Binary instructions with input imm, that implicitly use and -// define Areg and EFLAGS. -class BinOpAI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - Register areg, string operands> - : BinOpAI<opcode, mnemonic, typeinfo, areg, operands, WriteADC> { +// BinOpAI_AF - Instructions that read "a-reg imm" and write a-reg/EFLAGS. +class BinOpAI_AF<bits<8> o, string m, X86TypeInfo t, Register areg, + string args> : BinOpAI<o, m, t, areg, args> { + let Defs = [areg, EFLAGS]; +} +// BinOpAIF_AF - Instructions that read "a-reg imm", write a-reg and read/write +// EFLAGS. +class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg, + string args> : BinOpAI<o, m, t, areg, args> { let Uses = [areg, EFLAGS]; + let Defs = [areg, EFLAGS]; + let SchedRW = [WriteADC]; } -// BinOpAI_F - Binary instructions with input imm, that implicitly use A reg and -// implicitly define EFLAGS. -class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - Register areg, string operands> - : BinOpAI<opcode, mnemonic, typeinfo, areg, operands> { - let Defs = [EFLAGS]; +// UnaryOpR - Instructions that read "reg" and write "reg". +class UnaryOpR<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p> + : ITy<o, f, t, (outs t.RegClass:$dst), + (ins t.RegClass:$src1), m, "$dst", p>, Sched<[WriteALU]>; + +// UnaryOpM - Instructions that read "[mem]" and writes "[mem]". +class UnaryOpM<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p> + : ITy<o, f, t, (outs), (ins t.MemOperand:$dst), m, "$dst", p>, + Sched<[WriteALURMW]> { + let mayLoad = 1; + let mayStore = 1; } -// UnaryOpM - Unary instructions with a memory operand. -class UnaryOpM<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info, - list<dag> pattern> - : ITy<opcode, f, info, (outs), (ins info.MemOperand:$dst), mnemonic, - "$dst", pattern>; - -// UnaryOpR - Unary instructions with a register. -class UnaryOpR<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info, - list<dag> pattern> - : ITy<opcode, f, info, (outs info.RegClass:$dst), - (ins info.RegClass:$src1), mnemonic, "$dst", pattern>; - -// INCDECR - Instructions like "inc reg". -class INCDECR<Format f, string mnemonic, X86TypeInfo info, - SDPatternOperator node> - : UnaryOpR<0xFE, f, mnemonic, info, - [(set info.RegClass:$dst, EFLAGS, - (node info.RegClass:$src1, 1))]>; - -// INCDECM - Instructions like "inc [mem]". -class INCDECM<Format f, string mnemonic, X86TypeInfo info, int num> - : UnaryOpM<0xFE, f, mnemonic, info, - [(store (add (info.LoadNode addr:$dst), num), addr:$dst), - (implicit EFLAGS)]>; - -// INCDECR_ALT - Instructions like "inc reg" short forms. -class INCDECR_ALT<bits<8> opcode, string mnemonic, X86TypeInfo info> - : UnaryOpR<opcode, AddRegFrm, mnemonic, info, []>{ +// INCDECR - Instructions like "inc reg". +class INCDECR<Format f, string m, X86TypeInfo t, SDPatternOperator node> + : UnaryOpR<0xFF, f, m, t, + [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, 1))]>, + DefEFLAGS { + let isConvertibleToThreeAddress = 1; // Can xform into LEA. +} + +// INCDECM - Instructions like "inc [mem]". +class INCDECM<Format f, string m, X86TypeInfo t, int num> + : UnaryOpM<0xFF, f, m, t, + [(store (add (t.LoadNode addr:$dst), num), addr:$dst), + (implicit EFLAGS)]>, DefEFLAGS; + +// INCDECR_ALT - Instructions like "inc reg" short forms. +class INCDECR_ALT<bits<8> o, string m, X86TypeInfo t> + : UnaryOpR<o, AddRegFrm, m, t, []>, DefEFLAGS { + // Short forms only valid in 32-bit mode. Selected during MCInst lowering. let Predicates = [Not64BitMode]; - let Opcode = opcode; } -// MulOpR - Instructions like "mul reg". -class MulOpR<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info, - X86FoldableSchedWrite sched, list<dag> pattern> - : ITy<opcode, f, info, (outs), (ins info.RegClass:$src), mnemonic, - "$src", pattern>, - Sched<[sched]>; - -// MulOpM - Instructions like "mul [mem]". -class MulOpM<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info, - X86FoldableSchedWrite sched, list<dag> pattern> - : ITy<opcode, f, info, (outs), (ins info.MemOperand:$src), mnemonic, - "$src", pattern>, SchedLoadReg<sched>; - -// NegOpR - Instructions like "neg reg", with implicit EFLAGS. -class NegOpR<bits<8> opcode, string mnemonic, X86TypeInfo info> - : UnaryOpR<opcode, MRM3r, mnemonic, info, - [(set info.RegClass:$dst, (ineg info.RegClass:$src1)), - (implicit EFLAGS)]>; - -// NotOpR - Instructions like "not reg". -class NotOpR<bits<8> opcode, string mnemonic, X86TypeInfo info> - : UnaryOpR<opcode, MRM2r, mnemonic, info, - [(set info.RegClass:$dst, - (not info.RegClass:$src1))]>; - -// NegOpM - Instructions like "neg [mem]", with implicit EFLAGS. -class NegOpM<bits<8> opcode, string mnemonic, X86TypeInfo info> - : UnaryOpM<opcode, MRM3m, mnemonic, info, - [(store (ineg (info.LoadNode addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; - -// NotOpM - Instructions like "neg [mem]". -class NotOpM<bits<8> opcode, string mnemonic, X86TypeInfo info> - : UnaryOpM<opcode, MRM2m, mnemonic, info, - [(store (not (info.LoadNode addr:$dst)), addr:$dst)]>; - -// BinOpRR_C - Binary instructions with inputs "reg, reg", which used mainly -// with Constraints = "$src1 = $dst". -class BinOpRR_C<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info, - list<dag> pattern> - : ITy<opcode, f, info, (outs info.RegClass:$dst), - (ins info.RegClass:$src1, info.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", pattern>; - -// BinOpRM_C - Binary instructions with inputs "reg, [mem]", which used mainly -// with Constraints = "$src1 = $dst". -class BinOpRM_C<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info, - list<dag> pattern> - : ITy<opcode, f, info, (outs info.RegClass:$dst), - (ins info.RegClass:$src1, info.MemOperand:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", pattern>; +// MulOpR - Instructions like "mul reg". +class MulOpR<bits<8> o, Format f, string m, X86TypeInfo t, + X86FoldableSchedWrite sched, list<dag> p> + : ITy<o, f, t, (outs), (ins t.RegClass:$src), m, "$src", p>, Sched<[sched]>; + +// MulOpM - Instructions like "mul [mem]". +class MulOpM<bits<8> o, Format f, string m, X86TypeInfo t, + X86FoldableSchedWrite sched, list<dag> p> + : ITy<o, f, t, (outs), (ins t.MemOperand:$src), m, + "$src", p>, SchedLoadReg<sched> { + let mayLoad = 1; +} + +// NegOpR - Instructions like "neg reg". +class NegOpR<bits<8> o, string m, X86TypeInfo t> + : UnaryOpR<o, MRM3r, m, t, + [(set t.RegClass:$dst, (ineg t.RegClass:$src1)), + (implicit EFLAGS)]>, DefEFLAGS; + +// NegOpM - Instructions like "neg [mem]". +class NegOpM<bits<8> o, string m, X86TypeInfo t> + : UnaryOpM<o, MRM3m, m, t, + [(store (ineg (t.LoadNode addr:$dst)), addr:$dst), + (implicit EFLAGS)]>, DefEFLAGS; + +// NOTE: NOT does not set EFLAGS! +// NotOpR - Instructions like "not reg". +class NotOpR<bits<8> o, string m, X86TypeInfo t> + : UnaryOpR<o, MRM2r, m, t, [(set t.RegClass:$dst, (not t.RegClass:$src1))]>; + +// NotOpM - Instructions like "neg [mem]". +class NotOpM<bits<8> o, string m, X86TypeInfo t> + : UnaryOpM<o, MRM2m, m, t, + [(store (not (t.LoadNode addr:$dst)), addr:$dst)]>; // IMulOpRR - Instructions like "imul reg, reg, i8". -class IMulOpRR<bits<8> opcode, string mnemonic, X86TypeInfo info, - X86FoldableSchedWrite sched> - : BinOpRR_C<opcode, MRMSrcReg, mnemonic, info, - [(set info.RegClass:$dst, EFLAGS, - (X86smul_flag info.RegClass:$src1, - info.RegClass:$src2))]>, - Sched<[sched]>, TB; +class IMulOpRR<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRR_RF<o, m, t, X86smul_flag>, TB { + let Form = MRMSrcReg; + let SchedRW = [sched]; + // X = IMUL Y, Z --> X = IMUL Z, Y + let isCommutable = 1; +} // IMulOpRM - Instructions like "imul reg, reg, [mem]". -class IMulOpRM<bits<8> opcode, string mnemonic, X86TypeInfo info, - X86FoldableSchedWrite sched> - : BinOpRM_C<opcode, MRMSrcMem, mnemonic, info, - [(set info.RegClass:$dst, EFLAGS, - (X86smul_flag info.RegClass:$src1, (info.LoadNode addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>, TB; +class IMulOpRM<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRM_RF<o, m, t, X86smul_flag>, TB { +let Form = MRMSrcMem; +let SchedRW = [sched.Folded, sched.ReadAfterFold]; +} // IMulOpRRI8 - Instructions like "imul reg, reg, i8". -class IMulOpRRI8<bits<8> opcode, string mnemonic, X86TypeInfo info, +class IMulOpRRI8<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> - : ITy<opcode, MRMSrcReg, info, (outs info.RegClass:$dst), - (ins info.RegClass:$src1, info.Imm8Operand:$src2), mnemonic, - "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched]> { + : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst), + (ins t.RegClass:$src1, t.Imm8Operand:$src2), m, + "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched]>, DefEFLAGS { let ImmT = Imm8; } // IMulOpRRI - Instructions like "imul reg, reg, i16/i32/i64". -class IMulOpRRI<bits<8> opcode, string mnemonic, X86TypeInfo info, +class IMulOpRRI<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> - : ITy<opcode, MRMSrcReg, info, (outs info.RegClass:$dst), - (ins info.RegClass:$src1, info.ImmOperand:$src2), mnemonic, + : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst), + (ins t.RegClass:$src1, t.ImmOperand:$src2), m, "{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set info.RegClass:$dst, EFLAGS, - (X86smul_flag info.RegClass:$src1, - info.ImmNoSuOperator:$src2))]>, - Sched<[sched]>{ - let ImmT = info.ImmEncoding; + [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1, + t.ImmNoSuOperator:$src2))]>, + Sched<[sched]>, DefEFLAGS { + let ImmT = t.ImmEncoding; } // IMulOpRMI8 - Instructions like "imul reg, [mem], i8". -class IMulOpRMI8<bits<8> opcode, string mnemonic, X86TypeInfo info, +class IMulOpRMI8<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> - : ITy<opcode, MRMSrcMem, info, (outs info.RegClass:$dst), - (ins info.MemOperand:$src1, info.Imm8Operand:$src2), mnemonic, - "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched.Folded]> { + : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst), + (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m, + "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched.Folded]>, + DefEFLAGS { let ImmT = Imm8; + let mayLoad = 1; } // IMulOpRMI - Instructions like "imul reg, [mem], i16/i32/i64". -class IMulOpRMI<bits<8> opcode, string mnemonic, X86TypeInfo info, +class IMulOpRMI<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> - : ITy<opcode, MRMSrcMem, info, (outs info.RegClass:$dst), - (ins info.MemOperand:$src1, info.ImmOperand:$src2), mnemonic, + : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst), + (ins t.MemOperand:$src1, t.ImmOperand:$src2), m, "{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set info.RegClass:$dst, EFLAGS, - (X86smul_flag (info.LoadNode addr:$src1), - info.ImmNoSuOperator:$src2))]>, - Sched<[sched.Folded]>{ - let ImmT = info.ImmEncoding; + [(set t.RegClass:$dst, EFLAGS, + (X86smul_flag (t.LoadNode addr:$src1), t.ImmNoSuOperator:$src2))]>, + Sched<[sched.Folded]>, DefEFLAGS { + let ImmT = t.ImmEncoding; } -let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { -// Short forms only valid in 32-bit mode. Selected during MCInst lowering. -let hasSideEffects = 0 in { -def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>; -def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>; -} // hasSideEffects = 0 - -let isConvertibleToThreeAddress = 1 in { // Can xform into LEA. +let Constraints = "$src1 = $dst" in { +def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>, OpSize16; +def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>, OpSize32; def INC8r : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>; -def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>; -def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>; +def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>, OpSize16; +def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>, OpSize32; def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>; -} // isConvertibleToThreeAddress = 1 -} // Constraints = "$src1 = $dst", SchedRW -let SchedRW = [WriteALURMW] in { -let Predicates = [UseIncDec] in { - def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>; - def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>; - def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>; -} // Predicates -let Predicates = [UseIncDec, In64BitMode] in { - def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>; -} // Predicates -} // SchedRW - -let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { -// Short forms only valid in 32-bit mode. Selected during MCInst lowering. -let hasSideEffects = 0 in { -def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>; -def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>; -} // hasSideEffects = 0 - -let isConvertibleToThreeAddress = 1 in { // Can xform into LEA. +def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>, OpSize16; +def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>, OpSize32; def DEC8r : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>; -def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>; -def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>; +def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>, OpSize16; +def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>, OpSize32; def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>; -} // isConvertibleToThreeAddress = 1 -} // Constraints = "$src1 = $dst", SchedRW +} -let SchedRW = [WriteALURMW] in { let Predicates = [UseIncDec] in { - def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>; - def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>; - def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>; -} // Predicates +def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>; +def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>, OpSize16; +def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>, OpSize32; +def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>; +def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>, OpSize16; +def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>, OpSize32; +} let Predicates = [UseIncDec, In64BitMode] in { - def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>; -} // Predicates -} // SchedRW -} // Defs = [EFLAGS] +def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>; +def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>; +} // Extra precision multiplication @@ -562,14 +447,14 @@ def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8, // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), (implicit EFLAGS)]>; // AX,DX = AX*GR16 -let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in -def MUL16r : MulOpR<0xF7, MRM4r, "mul", Xi16, WriteIMul16, []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def MUL16r : MulOpR<0xF7, MRM4r, "mul", Xi16, WriteIMul16, []>, OpSize16; // EAX,EDX = EAX*GR32 -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32r : MulOpR<0xF7, MRM4r, "mul", Xi32, WriteIMul32, - [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>; + [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>, OpSize32; // RAX,RDX = RAX*GR64 -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64r : MulOpR<0xF7, MRM4r, "mul", Xi64, WriteIMul64, [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; // AL,AH = AL*[mem8] @@ -581,79 +466,68 @@ def MUL8m : MulOpM<0xF6, MRM4m, "mul", Xi8, WriteIMul8, [(set AL, (mul AL, (loadi8 addr:$src))), (implicit EFLAGS)]>; // AX,DX = AX*[mem16] -let mayLoad = 1, hasSideEffects = 0 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def MUL16m : MulOpM<0xF7, MRM4m, "mul", Xi16, WriteIMul16, []>; +def MUL16m : MulOpM<0xF7, MRM4m, "mul", Xi16, WriteIMul16, []>, OpSize16; // EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def MUL32m : MulOpM<0xF7, MRM4m, "mul", Xi32, WriteIMul32, []>; +def MUL32m : MulOpM<0xF7, MRM4m, "mul", Xi32, WriteIMul32, []>, OpSize32; // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : MulOpM<0xF7, MRM4m, "mul", Xi64, WriteIMul64, []>, - Requires<[In64BitMode]>; -} + Requires<[In64BitMode]>; -let hasSideEffects = 0 in { // AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : MulOpR<0xF6, MRM5r, "imul", Xi8, WriteIMul8, []>; // AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : MulOpR<0xF7, MRM5r, "imul", Xi16, WriteIMul16, []>; +def IMUL16r : MulOpR<0xF7, MRM5r, "imul", Xi16, WriteIMul16, []>, OpSize16; // EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : MulOpR<0xF7, MRM5r, "imul", Xi32, WriteIMul32, []>; +def IMUL32r : MulOpR<0xF7, MRM5r, "imul", Xi32, WriteIMul32, []>, OpSize32; // RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : MulOpR<0xF7, MRM5r, "imul", Xi64, WriteIMul64, []>; -let mayLoad = 1 in { // AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : MulOpM<0xF6, MRM5m, "imul", Xi8, WriteIMul8, []>; // AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16m : MulOpM<0xF7, MRM5m, "imul", Xi16, WriteIMul16, []>; +def IMUL16m : MulOpM<0xF7, MRM5m, "imul", Xi16, WriteIMul16, []>, OpSize16; // EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32m : MulOpM<0xF7, MRM5m, "imul", Xi32, WriteIMul32, []>; +def IMUL32m : MulOpM<0xF7, MRM5m, "imul", Xi32, WriteIMul32, []>, OpSize32; // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : MulOpM<0xF7, MRM5m, "imul", Xi64, WriteIMul64, []>, - Requires<[In64BitMode]>; -} + Requires<[In64BitMode]>; -let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { -// X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply -def IMUL16rr : IMulOpRR<0xAF, "imul", Xi16, WriteIMul16Reg>; -def IMUL32rr : IMulOpRR<0xAF, "imul", Xi32, WriteIMul32Reg>; +def IMUL16rr : IMulOpRR<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16; +def IMUL32rr : IMulOpRR<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32; def IMUL64rr : IMulOpRR<0xAF, "imul", Xi64, WriteIMul64Reg>; -} // isCommutable // Register-Memory Signed Integer Multiply -def IMUL16rm : IMulOpRM<0xAF, "imul", Xi16, WriteIMul16Reg>; -def IMUL32rm : IMulOpRM<0xAF, "imul", Xi32, WriteIMul32Reg>; +def IMUL16rm : IMulOpRM<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16; +def IMUL32rm : IMulOpRM<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32; def IMUL64rm : IMulOpRM<0xAF, "imul", Xi64, WriteIMul64Reg>; -} // Constraints = "$src1 = $dst" -} // Defs = [EFLAGS] +} // Surprisingly enough, these are not two address instructions! -let Defs = [EFLAGS] in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. // Register-Integer Signed Integer Multiply // GR16 = GR16*I8 -def IMUL16rri8 : IMulOpRRI8<0x6B, "imul", Xi16, WriteIMul16Imm>; +def IMUL16rri8 : IMulOpRRI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16; // GR16 = GR16*I16 -def IMUL16rri : IMulOpRRI<0x69, "imul", Xi16, WriteIMul16Imm>; +def IMUL16rri : IMulOpRRI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16; // GR32 = GR32*I8 -def IMUL32rri8 : IMulOpRRI8<0x6B, "imul", Xi32, WriteIMul32Imm>; +def IMUL32rri8 : IMulOpRRI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32; // GR32 = GR32*I32 -def IMUL32rri : IMulOpRRI<0x69, "imul", Xi32, WriteIMul32Imm>; +def IMUL32rri : IMulOpRRI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32; // GR64 = GR64*I8 def IMUL64rri8 : IMulOpRRI8<0x6B, "imul", Xi64, WriteIMul64Imm>; // GR64 = GR64*I32 @@ -661,21 +535,17 @@ def IMUL64rri32 : IMulOpRRI<0x69, "imul", Xi64, WriteIMul64Imm>; // Memory-Integer Signed Integer Multiply // GR16 = [mem16]*I8 -let mayLoad = 1 in { -def IMUL16rmi8 : IMulOpRMI8<0x6B, "imul", Xi16, WriteIMul16Imm>; +def IMUL16rmi8 : IMulOpRMI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16; // GR16 = [mem16]*I16 -def IMUL16rmi : IMulOpRMI<0x69, "imul", Xi16, WriteIMul16Imm>; +def IMUL16rmi : IMulOpRMI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16; // GR32 = [mem32]*I8 -def IMUL32rmi8 : IMulOpRMI8<0x6B, "imul", Xi32, WriteIMul32Imm>; +def IMUL32rmi8 : IMulOpRMI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32; // GR32 = [mem32]*I32 -def IMUL32rmi : IMulOpRMI<0x69, "imul", Xi32, WriteIMul32Imm>; +def IMUL32rmi : IMulOpRMI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32; // GR64 = [mem64]*I8 def IMUL64rmi8 : IMulOpRMI8<0x6B, "imul", Xi64, WriteIMul64Imm>; // GR64 = [mem64]*I32 def IMUL64rmi32 : IMulOpRMI<0x69, "imul", Xi64, WriteIMul64Imm>; -} // mayLoad -} // Defs = [EFLAGS] -} // hasSideEffects // unsigned division/remainder let hasSideEffects = 1 in { // so that we don't speculatively execute @@ -684,10 +554,10 @@ let Defs = [AL,AH,EFLAGS], Uses = [AX] in def DIV8r : MulOpR<0xF6, MRM6r, "div", Xi8, WriteDiv8, []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in // DX:AX/r16 = AX,DX -def DIV16r : MulOpR<0xF7, MRM6r, "div", Xi16, WriteDiv16, []>; +def DIV16r : MulOpR<0xF7, MRM6r, "div", Xi16, WriteDiv16, []>, OpSize16; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/r32 = EAX,EDX -def DIV32r : MulOpR<0xF7, MRM6r, "div", Xi32, WriteDiv32, []>; +def DIV32r : MulOpR<0xF7, MRM6r, "div", Xi32, WriteDiv32, []>, OpSize32; // RDX:RAX/r64 = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : MulOpR<0xF7, MRM6r, "div", Xi64, WriteDiv64, []>; @@ -698,9 +568,9 @@ let Defs = [AL,AH,EFLAGS], Uses = [AX] in def DIV8m : MulOpM<0xF6, MRM6m, "div", Xi8, WriteDiv8, []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in // DX:AX/[mem16] = AX,DX -def DIV16m : MulOpM<0xF7, MRM6m, "div", Xi16, WriteDiv16, []>; +def DIV16m : MulOpM<0xF7, MRM6m, "div", Xi16, WriteDiv16, []>, OpSize16; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX -def DIV32m : MulOpM<0xF7, MRM6m, "div", Xi32, WriteDiv32, []>; +def DIV32m : MulOpM<0xF7, MRM6m, "div", Xi32, WriteDiv32, []>, OpSize32; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : MulOpM<0xF7, MRM6m, "div", Xi64, WriteDiv64, []>, @@ -713,69 +583,52 @@ let Defs = [AL,AH,EFLAGS], Uses = [AX] in def IDIV8r : MulOpR<0xF6, MRM7r, "idiv", Xi8, WriteIDiv8, []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in // DX:AX/r16 = AX,DX -def IDIV16r: MulOpR<0xF7, MRM7r, "idiv", Xi16, WriteIDiv16, []>; +def IDIV16r: MulOpR<0xF7, MRM7r, "idiv", Xi16, WriteIDiv16, []>, OpSize16; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/r32 = EAX,EDX -def IDIV32r: MulOpR<0xF7, MRM7r, "idiv", Xi32, WriteIDiv32, []>; +def IDIV32r: MulOpR<0xF7, MRM7r, "idiv", Xi32, WriteIDiv32, []>, OpSize32; // RDX:RAX/r64 = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: MulOpR<0xF7, MRM7r, "idiv", Xi64, WriteIDiv64, []>; -let mayLoad = 1 in { let Defs = [AL,AH,EFLAGS], Uses = [AX] in // AX/[mem8] = AL,AH def IDIV8m : MulOpM<0xF6, MRM7m, "idiv", Xi8, WriteIDiv8, []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in // DX:AX/[mem16] = AX,DX -def IDIV16m: MulOpM<0xF7, MRM7m, "idiv", Xi16, WriteIDiv16, []>; +def IDIV16m: MulOpM<0xF7, MRM7m, "idiv", Xi16, WriteIDiv16, []>, OpSize16; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX -def IDIV32m: MulOpM<0xF7, MRM7m, "idiv", Xi32, WriteIDiv32, []>; +def IDIV32m: MulOpM<0xF7, MRM7m, "idiv", Xi32, WriteIDiv32, []>, OpSize32; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: MulOpM<0xF7, MRM7m, "idiv", Xi64, WriteIDiv64, []>, Requires<[In64BitMode]>; -} } // hasSideEffects = 1 -//===----------------------------------------------------------------------===// -// Two address Instructions. -// - -// unary instructions -let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { +let Constraints = "$src1 = $dst" in { def NEG8r : NegOpR<0xF6, "neg", Xi8>; -def NEG16r : NegOpR<0xF7, "neg", Xi16>; -def NEG32r : NegOpR<0xF7, "neg", Xi32>; +def NEG16r : NegOpR<0xF7, "neg", Xi16>, OpSize16; +def NEG32r : NegOpR<0xF7, "neg", Xi32>, OpSize32; def NEG64r : NegOpR<0xF7, "neg", Xi64>; -} // Constraints = "$src1 = $dst", SchedRW +} -// Read-modify-write negate. -let SchedRW = [WriteALURMW] in { def NEG8m : NegOpM<0xF6, "neg", Xi8>; -def NEG16m : NegOpM<0xF7, "neg", Xi16>; -def NEG32m : NegOpM<0xF7, "neg", Xi32>; +def NEG16m : NegOpM<0xF7, "neg", Xi16>, OpSize16; +def NEG32m : NegOpM<0xF7, "neg", Xi32>, OpSize32; def NEG64m : NegOpM<0xF7, "neg", Xi64>, Requires<[In64BitMode]>; -} // SchedRW -} // Defs = [EFLAGS] - -// Note: NOT does not set EFLAGS! - -let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { +let Constraints = "$src1 = $dst" in { def NOT8r : NotOpR<0xF6, "not", Xi8>; -def NOT16r : NotOpR<0xF7, "not", Xi16>; -def NOT32r : NotOpR<0xF7, "not", Xi32>; +def NOT16r : NotOpR<0xF7, "not", Xi16>, OpSize16; +def NOT32r : NotOpR<0xF7, "not", Xi32>, OpSize32; def NOT64r : NotOpR<0xF7, "not", Xi64>; -} // Constraints = "$src1 = $dst", SchedRW +} -let SchedRW = [WriteALURMW] in { def NOT8m : NotOpM<0xF6, "not", Xi8>; -def NOT16m : NotOpM<0xF7, "not", Xi16>; -def NOT32m : NotOpM<0xF7, "not", Xi32>; +def NOT16m : NotOpM<0xF7, "not", Xi16>, OpSize16; +def NOT32m : NotOpM<0xF7, "not", Xi32>, OpSize32; def NOT64m : NotOpM<0xF7, "not", Xi64>, Requires<[In64BitMode]>; -} // SchedRW /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is /// defined with "(set GPR:$dst, EFLAGS, (...". @@ -787,81 +640,73 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, SDNode opnodeflag, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress, bit ConvertibleToThreeAddressRR> { - let Defs = [EFLAGS] in { - let Constraints = "$src1 = $dst" in { - let isCommutable = CommutableRR in { - let isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { - def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; - def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>; - def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>; - def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; - } // isConvertibleToThreeAddress - } // isCommutable - - def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; - def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; - def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; - - def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; - def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>; - def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>; - def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; - - let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects= 0 in { - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - - // NOTE: These are order specific, we want the ri8 forms to be listed - // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, RegMRM>; - def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, RegMRM>; - def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, RegMRM>; - - def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>; - def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>; - def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>; - } - } // Constraints = "$src1 = $dst" - - let mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { - def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>; - def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>; - - // NOTE: These are order specific, we want the mi8 forms to be listed - // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, MemMRM>; - def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, MemMRM>; - let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, MemMRM>; - - def NAME#8mi : BinOpMI_RMW<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_RMW<0x80, mnemonic, Xi16, opnode, MemMRM>; - def NAME#32mi : BinOpMI_RMW<0x80, mnemonic, Xi32, opnode, MemMRM>; - let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_RMW<0x80, mnemonic, Xi64, opnode, MemMRM>; + let Constraints = "$src1 = $dst" in { + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { + def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; + def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; + def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; } - // These are for the disassembler since 0x82 opcode behaves like 0x80, but - // not in 64-bit mode. - let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1, - hasSideEffects = 0 in { - let Constraints = "$src1 = $dst" in - def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>; - let mayLoad = 1, mayStore = 1 in - def NAME#8mi8 : BinOpMI8_RMW<mnemonic, Xi8, MemMRM>; + def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + + def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; + def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; + def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; + + def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; + def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; + def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; } - } // Defs = [EFLAGS] - - def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, - "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, - "{$src, %ax|ax, $src}">; - def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, - "{$src, %eax|eax, $src}">; - def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, - "{$src, %rax|rax, $src}">; + } // Constraints = "$src1 = $dst" + + def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; + let Predicates = [In64BitMode] in + def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; + + def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + let Predicates = [In64BitMode] in + def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + + // These are for the disassembler since 0x82 opcode behaves like 0x80, but + // not in 64-bit mode. + let Predicates = [Not64BitMode] in { + let Constraints = "$src1 = $dst" in + def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + } + + def NAME#8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|al, $src}">; + def NAME#16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|ax, $src}">, OpSize16; + def NAME#32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|eax, $src}">, OpSize32; + def NAME#64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|rax, $src}">; } /// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is @@ -874,80 +719,73 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, string mnemonic, Format RegMRM, Format MemMRM, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress> { - let Uses = [EFLAGS], Defs = [EFLAGS] in { - let Constraints = "$src1 = $dst" in { - let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>; - let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>; - def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>; - } // isConvertibleToThreeAddress - } // isCommutable - - def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>; - def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>; - def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>; - - def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>; - def NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>; - def NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>; - - def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>; - - let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects = 0 in { - // NOTE: These are order specific, we want the ri8 forms to be listed - // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, RegMRM>; - def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, RegMRM>; - def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, RegMRM>; - - def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>; - def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>; - def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>; - } - } // Constraints = "$src1 = $dst" - - def NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>; - def NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>; - - // NOTE: These are order specific, we want the mi8 forms to be listed - // first so that they are slightly preferred to the mi forms. - let mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { - def NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, MemMRM>; - def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, MemMRM>; - let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, MemMRM>; - - def NAME#8mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi16, opnode, MemMRM>; - def NAME#32mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi32, opnode, MemMRM>; - let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_RMW_FF<0x80, mnemonic, Xi64, opnode, MemMRM>; - } - - // These are for the disassembler since 0x82 opcode behaves like 0x80, but - // not in 64-bit mode. - let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1, - hasSideEffects = 0 in { - let Constraints = "$src1 = $dst" in - def NAME#8ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi8, RegMRM>; - let mayLoad = 1, mayStore = 1 in - def NAME#8mi8 : BinOpMI8_RMW_FF<mnemonic, Xi8, MemMRM>; + let Constraints = "$src1 = $dst" in { + let isCommutable = CommutableRR in { + def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + } // isConvertibleToThreeAddress + } // isCommutable + + def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; + + def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; + def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + + def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; + + def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; } - } // Uses = [EFLAGS], Defs = [EFLAGS] - - def NAME#8i8 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi8 , AL, - "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi16, AX, - "{$src, %ax|ax, $src}">; - def NAME#32i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi32, EAX, - "{$src, %eax|eax, $src}">; - def NAME#64i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi64, RAX, + } // Constraints = "$src1 = $dst" + + def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; + let Predicates = [In64BitMode] in + def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; + + def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + let Predicates = [In64BitMode] in + def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + + // These are for the disassembler since 0x82 opcode behaves like 0x80, but + // not in 64-bit mode. + let Predicates = [Not64BitMode] in { + let Constraints = "$src1 = $dst" in + def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + } + + def NAME#8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|al, $src}">; + def NAME#16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|ax, $src}">, OpSize16; + def NAME#32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|eax, $src}">, OpSize32; + def NAME#64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -957,93 +795,88 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, /// multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, string mnemonic, Format RegMRM, Format MemMRM, - SDNode opnode, - bit CommutableRR, bit ConvertibleToThreeAddress> { - let Defs = [EFLAGS] in { - let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; - let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>; - def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; - } - } // isCommutable - - def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>; - def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>; - def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; - - def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>; - def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>; - def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; - - def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; - - let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects = 0 in { - // NOTE: These are order specific, we want the ri8 forms to be listed - // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, RegMRM>; - def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, RegMRM>; - def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, RegMRM>; - - def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; - def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; - def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>; - } - - def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>; - def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; - - // NOTE: These are order specific, we want the mi8 forms to be listed - // first so that they are slightly preferred to the mi forms. - let mayLoad = 1, hasSideEffects = 0 in { - def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>; - def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>; - let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; - - def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_F<0x80, mnemonic, Xi16, opnode, MemMRM>; - def NAME#32mi : BinOpMI_F<0x80, mnemonic, Xi32, opnode, MemMRM>; - let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_F<0x80, mnemonic, Xi64, opnode, MemMRM>; - } - - // These are for the disassembler since 0x82 opcode behaves like 0x80, but - // not in 64-bit mode. - let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1, - hasSideEffects = 0 in { - def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>; - let mayLoad = 1 in - def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; - } - } // Defs = [EFLAGS] - - def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, - "{$src, %al|al, $src}">; + SDNode opnode, bit CommutableRR, + bit ConvertibleToThreeAddress> { + let isCommutable = CommutableRR in { + def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; + } // isConvertibleToThreeAddress + } // isCommutable + + def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; + def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; + + def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; + def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; + + def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def NAME#16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def NAME#32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def NAME#64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; + + def NAME#16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def NAME#32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def NAME#64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; + } + + def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; + def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; + let Predicates = [In64BitMode] in + def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; + + def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def NAME#32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + let Predicates = [In64BitMode] in + def NAME#64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; + + // These are for the disassembler since 0x82 opcode behaves like 0x80, but + // not in 64-bit mode. + let Predicates = [Not64BitMode] in { + def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + let mayLoad = 1 in + def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; + } + + def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|al, $src}">; def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, - "{$src, %ax|ax, $src}">; + "{$src, %ax|ax, $src}">, OpSize16; def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, - "{$src, %eax|eax, $src}">; + "{$src, %eax|eax, $src}">, OpSize32; def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, - "{$src, %rax|rax, $src}">; + "{$src, %rax|rax, $src}">; } -defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m, +defm AND : ArithBinOp_RF<0x21, 0x23, 0x25, "and", MRM4r, MRM4m, X86and_flag, and, 1, 0, 0>; -defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m, +defm OR : ArithBinOp_RF<0x09, 0x0B, 0x0D, "or", MRM1r, MRM1m, X86or_flag, or, 1, 0, 0>; -defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m, +defm XOR : ArithBinOp_RF<0x31, 0x33, 0x35, "xor", MRM6r, MRM6m, X86xor_flag, xor, 1, 0, 0>; -defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m, +defm ADD : ArithBinOp_RF<0x01, 0x03, 0x05, "add", MRM0r, MRM0m, X86add_flag, add, 1, 1, 1>; let isCompare = 1 in { -defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m, +defm SUB : ArithBinOp_RF<0x29, 0x2B, 0x2D, "sub", MRM5r, MRM5m, X86sub_flag, sub, 0, 1, 0>; } @@ -1057,13 +890,13 @@ def XOR8rr_NOREX : I<0x30, MRMDestReg, (outs GR8_NOREX:$dst), Sched<[WriteALU]>; // Arithmetic. -defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag, +defm ADC : ArithBinOp_RFF<0x11, 0x13, 0x15, "adc", MRM2r, MRM2m, X86adc_flag, 1, 0>; -defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag, +defm SBB : ArithBinOp_RFF<0x19, 0x1B, 0x1D, "sbb", MRM3r, MRM3m, X86sbb_flag, 0, 0>; let isCompare = 1 in { -defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>; +defm CMP : ArithBinOp_F<0x39, 0x3B, 0x3D, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>; } // Patterns to recognize loads on the LHS of an ADC. We can't make X86adc_flag @@ -1201,44 +1034,37 @@ def : Pat<(store (X86adc_flag i64relocImmSExt32_su:$src, (load addr:$dst), EFLAG // they don't have all the usual imm8 and REV forms, and are encoded into a // different space. let isCompare = 1 in { - let Defs = [EFLAGS] in { - let isCommutable = 1 in { - // Avoid selecting these and instead use a test+and. Post processing will - // combine them. This gives bunch of other patterns that start with - // and a chance to match. - def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , null_frag>; - def TEST16rr : BinOpRR_F<0x84, "test", Xi16, null_frag>; - def TEST32rr : BinOpRR_F<0x84, "test", Xi32, null_frag>; - def TEST64rr : BinOpRR_F<0x84, "test", Xi64, null_frag>; - } // isCommutable - - let hasSideEffects = 0, mayLoad = 1 in { - def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , null_frag>; - def TEST16mr : BinOpMR_F<0x84, "test", Xi16, null_frag>; - def TEST32mr : BinOpMR_F<0x84, "test", Xi32, null_frag>; - def TEST64mr : BinOpMR_F<0x84, "test", Xi64, null_frag>; - } - - def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>; - def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>; - def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>; - def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>; - - def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>; - def TEST16mi : BinOpMI_F<0xF6, "test", Xi16, X86testpat, MRM0m>; - def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>; - let Predicates = [In64BitMode] in - def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>; - } // Defs = [EFLAGS] - - def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL, - "{$src, %al|al, $src}">; - def TEST16i16 : BinOpAI_F<0xA8, "test", Xi16, AX, - "{$src, %ax|ax, $src}">; - def TEST32i32 : BinOpAI_F<0xA8, "test", Xi32, EAX, - "{$src, %eax|eax, $src}">; - def TEST64i32 : BinOpAI_F<0xA8, "test", Xi64, RAX, - "{$src, %rax|rax, $src}">; + let isCommutable = 1 in { + // Avoid selecting these and instead use a test+and. Post processing will + // combine them. This gives bunch of other patterns that start with + // and a chance to match. + def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , null_frag>; + def TEST16rr : BinOpRR_F<0x85, "test", Xi16, null_frag>, OpSize16; + def TEST32rr : BinOpRR_F<0x85, "test", Xi32, null_frag>, OpSize32; + def TEST64rr : BinOpRR_F<0x85, "test", Xi64, null_frag>; + } // isCommutable + +def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , null_frag>; +def TEST16mr : BinOpMR_F<0x85, "test", Xi16, null_frag>, OpSize16; +def TEST32mr : BinOpMR_F<0x85, "test", Xi32, null_frag>, OpSize32; +def TEST64mr : BinOpMR_F<0x85, "test", Xi64, null_frag>; + +def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>; +def TEST16ri : BinOpRI_F<0xF7, "test", Xi16, X86testpat, MRM0r>, OpSize16; +def TEST32ri : BinOpRI_F<0xF7, "test", Xi32, X86testpat, MRM0r>, OpSize32; +def TEST64ri32 : BinOpRI_F<0xF7, "test", Xi64, X86testpat, MRM0r>; + +def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>; +def TEST16mi : BinOpMI_F<0xF7, "test", Xi16, X86testpat, MRM0m>, OpSize16; +def TEST32mi : BinOpMI_F<0xF7, "test", Xi32, X86testpat, MRM0m>, OpSize32; + + let Predicates = [In64BitMode] in + def TEST64mi32 : BinOpMI_F<0xF7, "test", Xi64, X86testpat, MRM0m>; + +def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL, "{$src, %al|al, $src}">; +def TEST16i16 : BinOpAI_F<0xA9, "test", Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; +def TEST32i32 : BinOpAI_F<0xA9, "test", Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; +def TEST64i32 : BinOpAI_F<0xA9, "test", Xi64, RAX, "{$src, %rax|rax, $src}">; } // isCompare // Patterns to match a relocImm into the immediate field. @@ -1269,30 +1095,30 @@ let Predicates = [HasBMI, NoEGPR] in { def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - VEX_4V, Sched<[sched]>; + VEX, VVVV, Sched<[sched]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - VEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + VEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } let Predicates = [HasBMI, HasEGPR, In64BitMode] in { def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - EVEX_4V, Sched<[sched]>; + EVEX, VVVV, Sched<[sched]>; def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } } // Complexity is reduced to give and with immediate a chance to match first. let Defs = [EFLAGS], AddedComplexity = -6 in { - defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS; - defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, REX_W; + defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8; + defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8, REX_W; } let Predicates = [HasBMI], AddedComplexity = -6 in { @@ -1315,12 +1141,12 @@ let hasSideEffects = 0 in { let Predicates = [HasBMI2, NoEGPR] in { def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>; + []>, T8, XD, VEX, VVVV, Sched<[WriteIMulH, sched]>; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V, + []>, T8, XD, VEX, VVVV, Sched<[WriteIMulHLd, sched.Folded, // Memory operand. ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -1339,11 +1165,11 @@ let Predicates = [HasBMI2, NoEGPR] in { let Predicates = [HasBMI2, HasEGPR, In64BitMode] in def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, EVEX_4V, Sched<[WriteIMulH, sched]>; + []>, T8, XD, EVEX, VVVV, Sched<[WriteIMulH, sched]>; let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, EVEX_4V, + []>, T8, XD, EVEX, VVVV, Sched<[WriteIMulHLd, sched.Folded, // Memory operand. ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -1362,38 +1188,25 @@ let Uses = [RDX] in // // We don't have patterns for these as there is no advantage over ADC for // most code. -class ADCOXOpRR <bits<8> opcode, string mnemonic, X86TypeInfo info> - : BinOpRR_C<opcode, MRMSrcReg, mnemonic, info, []>{ - let Opcode = opcode; - let OpSize = OpSizeFixed; +class ADCOXOpRR <string m, X86TypeInfo t> + : BinOpRRF_RF<0xF6, m, t, null_frag> { + let Form = MRMSrcReg; + let isCommutable = 1; } -class ADCOXOpRM <bits<8> opcode, string mnemonic, X86TypeInfo info> - : BinOpRM_C<opcode, MRMSrcMem, mnemonic, info, []>{ - let Opcode = opcode; - let OpSize = OpSizeFixed; +class ADCOXOpRM <string m, X86TypeInfo t> + : BinOpRMF_RF<0xF6, m, t, null_frag> { + let Form = MRMSrcMem; } -let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS], - Constraints = "$src1 = $dst", hasSideEffects = 0 in { - let SchedRW = [WriteADC], isCommutable = 1 in { - def ADCX32rr : ADCOXOpRR<0xF6, "adcx", Xi32>, T8PD; - def ADCX64rr : ADCOXOpRR<0xF6, "adcx", Xi64>, T8PD; - - def ADOX32rr : ADCOXOpRR<0xF6, "adox", Xi32>, T8XS; - def ADOX64rr : ADCOXOpRR<0xF6, "adox", Xi64>, T8XS; - } // SchedRW - - let mayLoad = 1, - SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold, - // Memory operand. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // Implicit read of EFLAGS - WriteADC.ReadAfterFold] in { - def ADCX32rm : ADCOXOpRM<0xF6, "adcx", Xi32>, T8PD; - def ADCX64rm : ADCOXOpRM<0xF6, "adcx", Xi64>, T8PD; - - def ADOX32rm : ADCOXOpRM<0xF6, "adox", Xi32>, T8XS; - def ADOX64rm : ADCOXOpRM<0xF6, "adox", Xi64>, T8XS; - } // mayLoad, SchedRW +let OpSize = OpSizeFixed, Constraints = "$src1 = $dst", + Predicates = [HasADX] in { +def ADCX32rr : ADCOXOpRR<"adcx", Xi32>, T8, PD; +def ADCX64rr : ADCOXOpRR<"adcx", Xi64>, T8, PD; +def ADOX32rr : ADCOXOpRR<"adox", Xi32>, T8, XS; +def ADOX64rr : ADCOXOpRR<"adox", Xi64>, T8, XS; +def ADCX32rm : ADCOXOpRM<"adcx", Xi32>, T8, PD; +def ADCX64rm : ADCOXOpRM<"adcx", Xi64>, T8, PD; +def ADOX32rm : ADCOXOpRM<"adox", Xi32>, T8, XS; +def ADOX64rm : ADCOXOpRM<"adox", Xi64>, T8, XS; } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td index 9d0735c9cbba..2590be8651d5 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td @@ -55,6 +55,11 @@ multiclass CMPCCXADD_Aliases<string Cond, int CC> { (CMPCCXADDmr32 GR32:$dst, i32mem:$dstsrc2, GR32:$src3, CC), 0>; def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}", (CMPCCXADDmr64 GR64:$dst, i64mem:$dstsrc2, GR64:$src3, CC), 0>; + + def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}", + (CMPCCXADDmr32_EVEX GR32:$dst, i32mem:$dstsrc2, GR32:$src3, CC), 0>; + def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}", + (CMPCCXADDmr64_EVEX GR64:$dst, i64mem:$dstsrc2, GR64:$src3, CC), 0>; } //===----------------------------------------------------------------------===// @@ -686,3 +691,11 @@ def : InstAlias<"vmsave\t{%rax|rax}", (VMSAVE64), 0>, Requires<[In64BitMode]>; def : InstAlias<"invlpga\t{%eax, %ecx|eax, ecx}", (INVLPGA32), 0>, Requires<[Not64BitMode]>; def : InstAlias<"invlpga\t{%rax, %ecx|rax, ecx}", (INVLPGA64), 0>, Requires<[In64BitMode]>; +// Aliases with explicit %xmm0 +def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", + (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>; +def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", + (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>; + +def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", + (SHA256RNDS2rm_EVEX VR128:$dst, i128mem:$src2), 0>; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td index 457833f8cc33..c77c77ee4a3e 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1515,6 +1515,23 @@ def : Pat<(X86add_flag_nocf GR32:$src1, 128), def : Pat<(X86add_flag_nocf GR64:$src1, 128), (SUB64ri32 GR64:$src1, -128)>; +// Depositing value to 8/16 bit subreg: +def : Pat<(or (and GR64:$dst, -256), + (i64 (zextloadi8 addr:$src))), + (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; + +def : Pat<(or (and GR32:$dst, -256), + (i32 (zextloadi8 addr:$src))), + (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; + +def : Pat<(or (and GR64:$dst, -65536), + (i64 (zextloadi16 addr:$src))), + (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; + +def : Pat<(or (and GR32:$dst, -65536), + (i32 (zextloadi16 addr:$src))), + (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; + // The same trick applies for 32-bit immediate fields in 64-bit // instructions. def : Pat<(add GR64:$src1, 0x0000000080000000), diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td index 09655d939121..6a9a74ce15f2 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td @@ -666,20 +666,20 @@ def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>; let Uses = [FPSW, FPCW] in { def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst), - "fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, PS, + "fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB, Requires<[HasFXSR]>; def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst), "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>, - PS, Requires<[HasFXSR, In64BitMode]>; + TB, Requires<[HasFXSR, In64BitMode]>; } // Uses = [FPSW, FPCW] let Defs = [FPSW, FPCW] in { def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src), "fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>, - PS, Requires<[HasFXSR]>; + TB, Requires<[HasFXSR]>; def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src), "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>, - PS, Requires<[HasFXSR, In64BitMode]>; + TB, Requires<[HasFXSR, In64BitMode]>; } // Defs = [FPSW, FPCW] } // SchedRW diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td index df05a5788a50..07e5576960d6 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td @@ -180,7 +180,7 @@ class OperandSize<bits<2> val> { bits<2> Value = val; } def OpSizeFixed : OperandSize<0>; // Never needs a 0x66 prefix. -def OpSize16 : OperandSize<1>; // Needs 0x66 prefix in 32-bit mode. +def OpSize16 : OperandSize<1>; // Needs 0x66 prefix in 32/64-bit mode. def OpSize32 : OperandSize<2>; // Needs 0x66 prefix in 16-bit mode. // Address size for encodings that change based on mode. @@ -234,7 +234,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, // based on address size of the mode? bits<2> AdSizeBits = AdSize.Value; - Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have? + Encoding OpEnc = EncNormal; // Encoding used by this instruction + // Which prefix byte does this inst have? + Prefix OpPrefix = !if(!eq(OpEnc, EncNormal), NoPrfx, PS); bits<3> OpPrefixBits = OpPrefix.Value; Map OpMap = OB; // Which opcode map does this inst have? bits<4> OpMapBits = OpMap.Value; @@ -243,7 +245,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? Domain ExeDomain = d; bit hasREPPrefix = 0; // Does this inst have a REP prefix? - Encoding OpEnc = EncNormal; // Encoding used by this instruction bits<2> OpEncBits = OpEnc.Value; bit IgnoresW = 0; // Does this inst ignore REX_W field? bit EVEX_W1_VEX_W0 = 0; // This EVEX inst with VEX.W==1 can become a VEX diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp index bc2d5ed1e17d..bddda6891356 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8263,8 +8263,8 @@ bool X86InstrInfo::unfoldMemoryOperand( DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg); - for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) - MIB.add(AddrOps[i]); + for (const MachineOperand &AddrOp : AddrOps) + MIB.add(AddrOp); MIB.setMemRefs(MMOs); NewMIs.push_back(MIB); @@ -8341,8 +8341,8 @@ bool X86InstrInfo::unfoldMemoryOperand( unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); - for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) - MIB.add(AddrOps[i]); + for (const MachineOperand &AddrOp : AddrOps) + MIB.add(AddrOp); MIB.addReg(Reg, RegState::Kill); MIB.setMemRefs(MMOs); NewMIs.push_back(MIB); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td index a3392b691c0a..4586fc541627 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td @@ -19,17 +19,17 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in { let Uses = [XMM0, EAX], Defs = [EFLAGS] in { def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "loadiwkey\t{$src2, $src1|$src1, $src2}", - [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8XS; + [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8, XS; } let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, EFLAGS] in { def ENCODEKEY128 : I<0xFA, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "encodekey128\t{$src, $dst|$dst, $src}", []>, T8XS; + "encodekey128\t{$src, $dst|$dst, $src}", []>, T8, XS; } let Uses = [XMM0, XMM1], Defs = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, EFLAGS] in { def ENCODEKEY256 : I<0xFB, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "encodekey256\t{$src, $dst|$dst, $src}", []>, T8XS; + "encodekey256\t{$src, $dst|$dst, $src}", []>, T8, XS; } let Constraints = "$src1 = $dst", @@ -37,22 +37,22 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in { def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), "aesenc128kl\t{$src2, $src1|$src1, $src2}", [(set VR128:$dst, EFLAGS, - (X86aesenc128kl VR128:$src1, addr:$src2))]>, T8XS; + (X86aesenc128kl VR128:$src1, addr:$src2))]>, T8, XS; def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), "aesdec128kl\t{$src2, $src1|$src1, $src2}", [(set VR128:$dst, EFLAGS, - (X86aesdec128kl VR128:$src1, addr:$src2))]>, T8XS; + (X86aesdec128kl VR128:$src1, addr:$src2))]>, T8, XS; def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), "aesenc256kl\t{$src2, $src1|$src1, $src2}", [(set VR128:$dst, EFLAGS, - (X86aesenc256kl VR128:$src1, addr:$src2))]>, T8XS; + (X86aesenc256kl VR128:$src1, addr:$src2))]>, T8, XS; def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), "aesdec256kl\t{$src2, $src1|$src1, $src2}", [(set VR128:$dst, EFLAGS, - (X86aesdec256kl VR128:$src1, addr:$src2))]>, T8XS; + (X86aesdec256kl VR128:$src1, addr:$src2))]>, T8, XS; } } // SchedRW, Predicates @@ -62,13 +62,13 @@ let SchedRW = [WriteSystem], Predicates = [HasWIDEKL] in { Defs = [EFLAGS, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7], mayLoad = 1 in { def AESENCWIDE128KL : I<0xD8, MRM0m, (outs), (ins opaquemem:$src), - "aesencwide128kl\t$src", []>, T8XS; + "aesencwide128kl\t$src", []>, T8, XS; def AESDECWIDE128KL : I<0xD8, MRM1m, (outs), (ins opaquemem:$src), - "aesdecwide128kl\t$src", []>, T8XS; + "aesdecwide128kl\t$src", []>, T8, XS; def AESENCWIDE256KL : I<0xD8, MRM2m, (outs), (ins opaquemem:$src), - "aesencwide256kl\t$src", []>, T8XS; + "aesencwide256kl\t$src", []>, T8, XS; def AESDECWIDE256KL : I<0xD8, MRM3m, (outs), (ins opaquemem:$src), - "aesdecwide256kl\t$src", []>, T8XS; + "aesdecwide256kl\t$src", []>, T8, XS; } } // SchedRW, Predicates diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td index acf7605b3f53..8d6bc8d0ee2c 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td @@ -487,24 +487,24 @@ def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, // -- Conversion Instructions defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", - WriteCvtPS2I, SSEPackedSingle>, PS, SIMD_EXC; + WriteCvtPS2I, SSEPackedSingle>, TB, SIMD_EXC; defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", - WriteCvtPD2I, SSEPackedDouble>, PD, SIMD_EXC; + WriteCvtPD2I, SSEPackedDouble>, TB, PD, SIMD_EXC; defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", - WriteCvtPS2I, SSEPackedSingle>, PS, SIMD_EXC; + WriteCvtPS2I, SSEPackedSingle>, TB, SIMD_EXC; defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", - WriteCvtPD2I, SSEPackedDouble>, PD, SIMD_EXC; + WriteCvtPD2I, SSEPackedDouble>, TB, PD, SIMD_EXC; defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", - WriteCvtI2PD, SSEPackedDouble>, PD; + WriteCvtI2PD, SSEPackedDouble>, TB, PD; let Constraints = "$src1 = $dst" in { defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, int_x86_sse_cvtpi2ps, i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, PS, SIMD_EXC; + SSEPackedSingle>, TB, SIMD_EXC; } // Extract / Insert @@ -548,13 +548,13 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), // Misc. let SchedRW = [SchedWriteShuffle.MMX] in { let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in -def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), - "maskmovq\t{$mask, $src|$src, $mask}", - [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>; +def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), + "maskmovq\t{$mask, $src|$src, $mask}", + [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>; let Uses = [RDI], Predicates = [HasMMX, HasSSE1,In64BitMode] in -def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), - "maskmovq\t{$mask, $src|$src, $mask}", - [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>; +def MMX_MASKMOVQ64: MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), + "maskmovq\t{$mask, $src|$src, $mask}", + [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>; } // 64-bit bit convert. diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td index 2ea10e317e12..305bd74f7bd7 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td @@ -165,10 +165,10 @@ def POPP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "popp\t$reg", []>, REX_W, ExplicitREX2Prefix, Requires<[In64BitMode]>; def POP2: I<0x8F, MRM0r, (outs GR64:$reg1, GR64:$reg2), (ins), "pop2\t{$reg2, $reg1|$reg1, $reg2}", - []>, EVEX_4V, EVEX_B, T_MAP4PS; + []>, EVEX, VVVV, EVEX_B, T_MAP4; def POP2P: I<0x8F, MRM0r, (outs GR64:$reg1, GR64:$reg2), (ins), "pop2p\t{$reg2, $reg1|$reg1, $reg2}", - []>, EVEX_4V, EVEX_B, T_MAP4PS, REX_W; + []>, EVEX, VVVV, EVEX_B, T_MAP4, REX_W; } // mayLoad, SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in @@ -186,10 +186,10 @@ def PUSHP64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "pushp\t$reg", []>, REX_W, ExplicitREX2Prefix, Requires<[In64BitMode]>; def PUSH2: I<0xFF, MRM6r, (outs), (ins GR64:$reg1, GR64:$reg2), "push2\t{$reg2, $reg1|$reg1, $reg2}", - []>, EVEX_4V, EVEX_B, T_MAP4PS; + []>, EVEX, VVVV, EVEX_B, T_MAP4; def PUSH2P: I<0xFF, MRM6r, (outs), (ins GR64:$reg1, GR64:$reg2), "push2p\t{$reg2, $reg1|$reg1, $reg2}", - []>, EVEX_4V, EVEX_B, T_MAP4PS, REX_W; + []>, EVEX, VVVV, EVEX_B, T_MAP4, REX_W; } // mayStore, SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in { def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>, @@ -251,52 +251,52 @@ let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, - PS, OpSize16, Sched<[WriteBSF]>; + TB, OpSize16, Sched<[WriteBSF]>; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, - PS, OpSize16, Sched<[WriteBSFLd]>; + TB, OpSize16, Sched<[WriteBSFLd]>; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, - PS, OpSize32, Sched<[WriteBSF]>; + TB, OpSize32, Sched<[WriteBSF]>; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, - PS, OpSize32, Sched<[WriteBSFLd]>; + TB, OpSize32, Sched<[WriteBSFLd]>; def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, - PS, Sched<[WriteBSF]>; + TB, Sched<[WriteBSF]>; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, - PS, Sched<[WriteBSFLd]>; + TB, Sched<[WriteBSFLd]>; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, - PS, OpSize16, Sched<[WriteBSR]>; + TB, OpSize16, Sched<[WriteBSR]>; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, - PS, OpSize16, Sched<[WriteBSRLd]>; + TB, OpSize16, Sched<[WriteBSRLd]>; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, - PS, OpSize32, Sched<[WriteBSR]>; + TB, OpSize32, Sched<[WriteBSR]>; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, - PS, OpSize32, Sched<[WriteBSRLd]>; + TB, OpSize32, Sched<[WriteBSRLd]>; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, - PS, Sched<[WriteBSR]>; + TB, Sched<[WriteBSR]>; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, - PS, Sched<[WriteBSRLd]>; + TB, Sched<[WriteBSRLd]>; } // Defs = [EFLAGS] let SchedRW = [WriteMicrocoded] in { @@ -1095,29 +1095,29 @@ let Predicates = [HasMOVBE] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, - OpSize16, T8PS; + OpSize16, T8; def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movbe{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, - OpSize32, T8PS; + OpSize32, T8; def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movbe{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, - T8PS; + T8; } let SchedRW = [WriteStore] in { def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(store (bswap GR16:$src), addr:$dst)]>, - OpSize16, T8PS; + OpSize16, T8; def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movbe{l}\t{$src, $dst|$dst, $src}", [(store (bswap GR32:$src), addr:$dst)]>, - OpSize32, T8PS; + OpSize32, T8; def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movbe{q}\t{$src, $dst|$dst, $src}", [(store (bswap GR64:$src), addr:$dst)]>, - T8PS; + T8; } } @@ -1127,13 +1127,13 @@ let Predicates = [HasMOVBE] in { let Predicates = [HasRDRAND], Defs = [EFLAGS], SchedRW = [WriteSystem] in { def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins), "rdrand{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86rdrand))]>, - OpSize16, PS; + OpSize16, TB; def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins), "rdrand{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86rdrand))]>, - OpSize32, PS; + OpSize32, TB; def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins), "rdrand{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86rdrand))]>, - PS; + TB; } //===----------------------------------------------------------------------===// @@ -1141,11 +1141,11 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS], SchedRW = [WriteSystem] in { // let Predicates = [HasRDSEED], Defs = [EFLAGS], SchedRW = [WriteSystem] in { def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), "rdseed{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, PS; + [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, TB; def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), "rdseed{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, PS; + [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, TB; def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdseed{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86rdseed))]>, PS; + [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB; } //===----------------------------------------------------------------------===// @@ -1155,29 +1155,29 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in { def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "lzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, - XS, OpSize16, Sched<[WriteLZCNT]>; + TB, XS, OpSize16, Sched<[WriteLZCNT]>; def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctlz (loadi16 addr:$src))), - (implicit EFLAGS)]>, XS, OpSize16, Sched<[WriteLZCNTLd]>; + (implicit EFLAGS)]>, TB, XS, OpSize16, Sched<[WriteLZCNTLd]>; def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, - XS, OpSize32, Sched<[WriteLZCNT]>; + TB, XS, OpSize32, Sched<[WriteLZCNT]>; def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctlz (loadi32 addr:$src))), - (implicit EFLAGS)]>, XS, OpSize32, Sched<[WriteLZCNTLd]>; + (implicit EFLAGS)]>, TB, XS, OpSize32, Sched<[WriteLZCNTLd]>; def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>, - XS, Sched<[WriteLZCNT]>; + TB, XS, Sched<[WriteLZCNT]>; def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctlz (loadi64 addr:$src))), - (implicit EFLAGS)]>, XS, Sched<[WriteLZCNTLd]>; + (implicit EFLAGS)]>, TB, XS, Sched<[WriteLZCNTLd]>; } //===----------------------------------------------------------------------===// @@ -1187,29 +1187,29 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "tzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, - XS, OpSize16, Sched<[WriteTZCNT]>; + TB, XS, OpSize16, Sched<[WriteTZCNT]>; def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "tzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (cttz (loadi16 addr:$src))), - (implicit EFLAGS)]>, XS, OpSize16, Sched<[WriteTZCNTLd]>; + (implicit EFLAGS)]>, TB, XS, OpSize16, Sched<[WriteTZCNTLd]>; def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "tzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, - XS, OpSize32, Sched<[WriteTZCNT]>; + TB, XS, OpSize32, Sched<[WriteTZCNT]>; def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "tzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (cttz (loadi32 addr:$src))), - (implicit EFLAGS)]>, XS, OpSize32, Sched<[WriteTZCNTLd]>; + (implicit EFLAGS)]>, TB, XS, OpSize32, Sched<[WriteTZCNTLd]>; def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "tzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>, - XS, Sched<[WriteTZCNT]>; + TB, XS, Sched<[WriteTZCNT]>; def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "tzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (cttz (loadi64 addr:$src))), - (implicit EFLAGS)]>, XS, Sched<[WriteTZCNTLd]>; + (implicit EFLAGS)]>, TB, XS, Sched<[WriteTZCNTLd]>; } multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM, @@ -1218,11 +1218,11 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM, let hasSideEffects = 0 in { def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8PS, VEX_4V, Sched<[sched]>; + T8, VEX, VVVV, Sched<[sched]>; let mayLoad = 1 in def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8PS, VEX_4V, Sched<[sched.Folded]>; + T8, VEX, VVVV, Sched<[sched.Folded]>; } } @@ -1288,12 +1288,12 @@ multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC, def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>, - T8PS, VEX, Sched<[Sched]>; + T8, VEX, Sched<[Sched]>; let mayLoad = 1 in def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)), - (implicit EFLAGS)]>, T8PS, VEX, + (implicit EFLAGS)]>, T8, VEX, Sched<[Sched.Folded, // x86memop:$src1 ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -1371,33 +1371,33 @@ multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC, def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>, - VEX_4V, Sched<[WriteALU]>; + VEX, VVVV, Sched<[WriteALU]>; def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>, - VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; + VEX, VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; } let Predicates = [HasBMI2, NoEGPR] in { defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem, - X86pdep, loadi32>, T8XD; + X86pdep, loadi32>, T8, XD; defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem, - X86pdep, loadi64>, T8XD, REX_W; + X86pdep, loadi64>, T8, XD, REX_W; defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem, - X86pext, loadi32>, T8XS; + X86pext, loadi32>, T8, XS; defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem, - X86pext, loadi64>, T8XS, REX_W; + X86pext, loadi64>, T8, XS, REX_W; } let Predicates = [HasBMI2, HasEGPR] in { defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem, - X86pdep, loadi32, "_EVEX">, T8XD, EVEX; + X86pdep, loadi32, "_EVEX">, T8, XD, EVEX; defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem, - X86pdep, loadi64, "_EVEX">, T8XD, REX_W, EVEX; + X86pdep, loadi64, "_EVEX">, T8, XD, REX_W, EVEX; defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem, - X86pext, loadi32, "_EVEX">, T8XS, EVEX; + X86pext, loadi32, "_EVEX">, T8, XS, EVEX; defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem, - X86pext, loadi64, "_EVEX">, T8XS, REX_W, EVEX; + X86pext, loadi64, "_EVEX">, T8, XS, REX_W, EVEX; } //===----------------------------------------------------------------------===// @@ -1419,12 +1419,12 @@ multiclass lwpins_intr<RegisterClass RC> { def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, timm:$cntl))]>, - XOP_4V, XOPA; + XOP, VVVV, XOPA; let mayLoad = 1 in def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), timm:$cntl))]>, - XOP_4V, XOPA; + XOP, VVVV, XOPA; } let Defs = [EFLAGS] in { @@ -1435,12 +1435,12 @@ let Defs = [EFLAGS] in { multiclass lwpval_intr<RegisterClass RC, Intrinsic Int> { def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP_4V, XOPA; + [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP, VVVV, XOPA; let mayLoad = 1 in def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", [(Int RC:$src0, (loadi32 addr:$src1), timm:$cntl)]>, - XOP_4V, XOPA; + XOP, VVVV, XOPA; } defm LWPVAL32 : lwpval_intr<GR32, int_x86_lwpval32>; @@ -1471,22 +1471,22 @@ let SchedRW = [ WriteSystem ] in { let SchedRW = [WriteSystem] in { def UMONITOR16 : I<0xAE, MRM6r, (outs), (ins GR16:$src), "umonitor\t$src", [(int_x86_umonitor GR16:$src)]>, - XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>; + TB, XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>; def UMONITOR32 : I<0xAE, MRM6r, (outs), (ins GR32:$src), "umonitor\t$src", [(int_x86_umonitor GR32:$src)]>, - XS, AdSize32, Requires<[HasWAITPKG]>; + TB, XS, AdSize32, Requires<[HasWAITPKG]>; def UMONITOR64 : I<0xAE, MRM6r, (outs), (ins GR64:$src), "umonitor\t$src", [(int_x86_umonitor GR64:$src)]>, - XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>; + TB, XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>; let Uses = [EAX, EDX], Defs = [EFLAGS] in { def UMWAIT : I<0xAE, MRM6r, (outs), (ins GR32orGR64:$src), "umwait\t$src", [(set EFLAGS, (X86umwait GR32orGR64:$src, EDX, EAX))]>, - XD, Requires<[HasWAITPKG]>; + TB, XD, Requires<[HasWAITPKG]>; def TPAUSE : I<0xAE, MRM6r, (outs), (ins GR32orGR64:$src), "tpause\t$src", [(set EFLAGS, (X86tpause GR32orGR64:$src, EDX, EAX))]>, - PD, Requires<[HasWAITPKG]>; + TB, PD, Requires<[HasWAITPKG]>; } } // SchedRW @@ -1497,19 +1497,19 @@ let SchedRW = [WriteStore] in { def MOVDIRI32 : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movdiri\t{$src, $dst|$dst, $src}", [(int_x86_directstore32 addr:$dst, GR32:$src)]>, - T8PS, Requires<[HasMOVDIRI, NoEGPR]>; + T8, Requires<[HasMOVDIRI, NoEGPR]>; def MOVDIRI64 : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movdiri\t{$src, $dst|$dst, $src}", [(int_x86_directstore64 addr:$dst, GR64:$src)]>, - T8PS, Requires<[In64BitMode, HasMOVDIRI, NoEGPR]>; + T8, Requires<[In64BitMode, HasMOVDIRI, NoEGPR]>; def MOVDIRI32_EVEX : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movdiri\t{$src, $dst|$dst, $src}", [(int_x86_directstore32 addr:$dst, GR32:$src)]>, - EVEX_NoCD8, T_MAP4PS, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>; + EVEX, NoCD8, T_MAP4, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>; def MOVDIRI64_EVEX : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movdiri\t{$src, $dst|$dst, $src}", [(int_x86_directstore64 addr:$dst, GR64:$src)]>, - EVEX_NoCD8, T_MAP4PS, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>; + EVEX, NoCD8, T_MAP4, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>; } // SchedRW //===----------------------------------------------------------------------===// @@ -1518,23 +1518,23 @@ def MOVDIRI64_EVEX : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), let SchedRW = [WriteStore] in { def MOVDIR64B16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src), "movdir64b\t{$src, $dst|$dst, $src}", []>, - T8PD, AdSize16, Requires<[HasMOVDIR64B, Not64BitMode]>; + T8, PD, AdSize16, Requires<[HasMOVDIR64B, Not64BitMode]>; def MOVDIR64B32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src), "movdir64b\t{$src, $dst|$dst, $src}", [(int_x86_movdir64b GR32:$dst, addr:$src)]>, - T8PD, AdSize32, Requires<[HasMOVDIR64B, NoEGPR]>; + T8, PD, AdSize32, Requires<[HasMOVDIR64B, NoEGPR]>; def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src), "movdir64b\t{$src, $dst|$dst, $src}", [(int_x86_movdir64b GR64:$dst, addr:$src)]>, - T8PD, AdSize64, Requires<[HasMOVDIR64B, NoEGPR, In64BitMode]>; + T8, PD, AdSize64, Requires<[HasMOVDIR64B, NoEGPR, In64BitMode]>; def MOVDIR64B32_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src), "movdir64b\t{$src, $dst|$dst, $src}", [(int_x86_movdir64b GR32:$dst, addr:$src)]>, - EVEX_NoCD8, T_MAP4PD, AdSize32, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>; + EVEX, NoCD8, T_MAP4, PD, AdSize32, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>; def MOVDIR64B64_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src), "movdir64b\t{$src, $dst|$dst, $src}", [(int_x86_movdir64b GR64:$dst, addr:$src)]>, - EVEX_NoCD8, T_MAP4PD, AdSize64, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>; + EVEX, NoCD8, T_MAP4, PD, AdSize64, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// @@ -1544,28 +1544,28 @@ let SchedRW = [WriteStore], Defs = [EFLAGS] in { def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>, - T8XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; + T8, XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>, - T8XD, AdSize32, Requires<[HasENQCMD]>; + T8, XD, AdSize32, Requires<[HasENQCMD]>; def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>, - T8XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>; + T8, XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>; def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>, - T8XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; + T8, XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>, - T8XS, AdSize32, Requires<[HasENQCMD]>; + T8, XS, AdSize32, Requires<[HasENQCMD]>; def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>, - T8XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>; + T8, XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>; } //===----------------------------------------------------------------------===// @@ -1588,11 +1588,11 @@ let SchedRW = [WriteSystem] in { let Uses = [EAX, EDX] in def INVLPGB32 : I<0x01, MRM_FE, (outs), (ins), "invlpgb", []>, - PS, Requires<[Not64BitMode]>; + TB, Requires<[Not64BitMode]>; let Uses = [RAX, EDX] in def INVLPGB64 : I<0x01, MRM_FE, (outs), (ins), "invlpgb", []>, - PS, Requires<[In64BitMode]>; + TB, Requires<[In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// @@ -1602,7 +1602,7 @@ let SchedRW = [WriteSystem] in { let SchedRW = [WriteSystem] in { def TLBSYNC : I<0x01, MRM_FF, (outs), (ins), "tlbsync", []>, - PS, Requires<[]>; + TB, Requires<[]>; } // SchedRW //===----------------------------------------------------------------------===// @@ -1610,14 +1610,14 @@ let SchedRW = [WriteSystem] in { // let Uses = [EAX], SchedRW = [WriteSystem] in def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>, - Requires<[HasHRESET]>, TAXS; + Requires<[HasHRESET]>, TA, XS; //===----------------------------------------------------------------------===// // SERIALIZE Instruction // let SchedRW = [WriteSystem] in def SERIALIZE : I<0x01, MRM_E8, (outs), (ins), "serialize", - [(int_x86_serialize)]>, PS, + [(int_x86_serialize)]>, TB, Requires<[HasSERIALIZE]>; //===----------------------------------------------------------------------===// @@ -1625,9 +1625,9 @@ let SchedRW = [WriteSystem] in // let Predicates = [HasTSXLDTRK], SchedRW = [WriteSystem] in { def XSUSLDTRK : I<0x01, MRM_E8, (outs), (ins), "xsusldtrk", - [(int_x86_xsusldtrk)]>, XD; + [(int_x86_xsusldtrk)]>, TB, XD; def XRESLDTRK : I<0x01, MRM_E9, (outs), (ins), "xresldtrk", - [(int_x86_xresldtrk)]>, XD; + [(int_x86_xresldtrk)]>, TB, XD; } //===----------------------------------------------------------------------===// @@ -1635,18 +1635,18 @@ let Predicates = [HasTSXLDTRK], SchedRW = [WriteSystem] in { // let Predicates = [HasUINTR, In64BitMode], SchedRW = [WriteSystem] in { def UIRET : I<0x01, MRM_EC, (outs), (ins), "uiret", - []>, XS; + []>, TB, XS; def CLUI : I<0x01, MRM_EE, (outs), (ins), "clui", - [(int_x86_clui)]>, XS; + [(int_x86_clui)]>, TB, XS; def STUI : I<0x01, MRM_EF, (outs), (ins), "stui", - [(int_x86_stui)]>, XS; + [(int_x86_stui)]>, TB, XS; def SENDUIPI : I<0xC7, MRM6r, (outs), (ins GR64:$arg), "senduipi\t$arg", - [(int_x86_senduipi GR64:$arg)]>, XS; + [(int_x86_senduipi GR64:$arg)]>, TB, XS; let Defs = [EFLAGS] in def TESTUI : I<0x01, MRM_ED, (outs), (ins), "testui", - [(set EFLAGS, (X86testui))]>, XS; + [(set EFLAGS, (X86testui))]>, TB, XS; } //===----------------------------------------------------------------------===// @@ -1663,21 +1663,38 @@ let Predicates = [HasPREFETCHI, In64BitMode], SchedRW = [WriteLoad] in { // CMPCCXADD Instructions // let isCodeGenOnly = 1, ForceDisassemble = 1, mayLoad = 1, mayStore = 1, - Predicates = [HasCMPCCXADD, In64BitMode], Defs = [EFLAGS], - Constraints = "$dstsrc1 = $dst" in { + Defs = [EFLAGS], Constraints = "$dstsrc1 = $dst" in { +let Predicates = [HasCMPCCXADD, NoEGPR, In64BitMode] in { def CMPCCXADDmr32 : I<0xe0, MRMDestMem4VOp3CC, (outs GR32:$dst), (ins GR32:$dstsrc1, i32mem:$dstsrc2, GR32:$src3, ccode:$cond), "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}", [(set GR32:$dst, (X86cmpccxadd addr:$dstsrc2, GR32:$dstsrc1, GR32:$src3, timm:$cond))]>, - VEX_4V, T8PD, Sched<[WriteXCHG]>; + VEX, VVVV, T8, PD, Sched<[WriteXCHG]>; def CMPCCXADDmr64 : I<0xe0, MRMDestMem4VOp3CC, (outs GR64:$dst), (ins GR64:$dstsrc1, i64mem:$dstsrc2, GR64:$src3, ccode:$cond), "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}", [(set GR64:$dst, (X86cmpccxadd addr:$dstsrc2, GR64:$dstsrc1, GR64:$src3, timm:$cond))]>, - VEX_4V, REX_W, T8PD, Sched<[WriteXCHG]>; + VEX, VVVV, REX_W, T8, PD, Sched<[WriteXCHG]>; +} + +let Predicates = [HasCMPCCXADD, HasEGPR, In64BitMode] in { +def CMPCCXADDmr32_EVEX : I<0xe0, MRMDestMem4VOp3CC, (outs GR32:$dst), + (ins GR32:$dstsrc1, i32mem:$dstsrc2, GR32:$src3, ccode:$cond), + "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}", + [(set GR32:$dst, (X86cmpccxadd addr:$dstsrc2, + GR32:$dstsrc1, GR32:$src3, timm:$cond))]>, + EVEX, VVVV, NoCD8, T8, PD, Sched<[WriteXCHG]>; + +def CMPCCXADDmr64_EVEX : I<0xe0, MRMDestMem4VOp3CC, (outs GR64:$dst), + (ins GR64:$dstsrc1, i64mem:$dstsrc2, GR64:$src3, ccode:$cond), + "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}", + [(set GR64:$dst, (X86cmpccxadd addr:$dstsrc2, + GR64:$dstsrc1, GR64:$src3, timm:$cond))]>, + EVEX, VVVV, NoCD8, REX_W, T8, PD, Sched<[WriteXCHG]>; +} } //===----------------------------------------------------------------------===// @@ -1686,12 +1703,12 @@ def CMPCCXADDmr64 : I<0xe0, MRMDestMem4VOp3CC, (outs GR64:$dst), let Predicates = [HasCLFLUSHOPT], SchedRW = [WriteLoad] in def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src), - "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD; + "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, TB, PD; let Predicates = [HasCLWB], SchedRW = [WriteLoad] in def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", - [(int_x86_clwb addr:$src)]>, PD; + [(int_x86_clwb addr:$src)]>, TB, PD; let Predicates = [HasCLDEMOTE], SchedRW = [WriteLoad] in def CLDEMOTE : I<0x1C, MRM0m, (outs), (ins i8mem:$src), "cldemote\t$src", - [(int_x86_cldemote addr:$src)]>, PS; + [(int_x86_cldemote addr:$src)]>, TB; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td index dc0e267a83e3..bc17b00f3573 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td @@ -39,7 +39,7 @@ multiclass RAOINT_BASE<string OpcodeStr> { Sched<[WriteALURMW]>, REX_W; } -defm AADD : RAOINT_BASE<"add">, T8PS; -defm AAND : RAOINT_BASE<"and">, T8PD; -defm AOR : RAOINT_BASE<"or" >, T8XD; -defm AXOR : RAOINT_BASE<"xor">, T8XS; +defm AADD : RAOINT_BASE<"add">, T8; +defm AAND : RAOINT_BASE<"and">, T8, PD; +defm AOR : RAOINT_BASE<"or" >, T8, XD; +defm AXOR : RAOINT_BASE<"xor">, T8, XS; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td index 6439f717accb..747f5aa86653 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td @@ -17,13 +17,13 @@ let SchedRW = [WriteSystem], Predicates = [HasSGX] in { // ENCLS - Execute an Enclave System Function of Specified Leaf Number def ENCLS : I<0x01, MRM_CF, (outs), (ins), - "encls", []>, PS; + "encls", []>, TB; // ENCLU - Execute an Enclave User Function of Specified Leaf Number def ENCLU : I<0x01, MRM_D7, (outs), (ins), - "enclu", []>, PS; + "enclu", []>, TB; // ENCLV - Execute an Enclave VMM Function of Specified Leaf Number def ENCLV : I<0x01, MRM_C0, (outs), (ins), - "enclv", []>, PS; + "enclv", []>, TB; } // SchedRW diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td index ab13fa43c92d..05ed6585db6d 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td @@ -17,31 +17,31 @@ let SchedRW = [WriteSystem] in { // F3 0F 01 FF let Uses = [RAX], Defs = [EAX, EFLAGS] in -def PSMASH: I<0x01, MRM_FF, (outs), (ins), "psmash", []>, XS, +def PSMASH: I<0x01, MRM_FF, (outs), (ins), "psmash", []>, TB, XS, Requires<[In64BitMode]>; // F2 0F 01 FF let Uses = [RAX, RCX, RDX], Defs = [EAX, EFLAGS] in def PVALIDATE64: I<0x01, MRM_FF, (outs), (ins), "pvalidate",[]>, - XD, Requires<[In64BitMode]>; + TB, XD, Requires<[In64BitMode]>; let Uses = [EAX, ECX, EDX], Defs = [EAX, EFLAGS] in def PVALIDATE32: I<0x01, MRM_FF, (outs), (ins), "pvalidate",[]>, - XD, Requires<[Not64BitMode]>; + TB, XD, Requires<[Not64BitMode]>; // F2 0F 01 FE let Uses = [RAX, RCX], Defs = [EAX, EFLAGS] in -def RMPUPDATE: I<0x01, MRM_FE, (outs), (ins), "rmpupdate", []>, XD, +def RMPUPDATE: I<0x01, MRM_FE, (outs), (ins), "rmpupdate", []>, TB, XD, Requires<[In64BitMode]>; // F3 0F 01 FE let Uses = [RAX, RCX, RDX], Defs = [EAX, EFLAGS] in -def RMPADJUST: I<0x01, MRM_FE, (outs), (ins), "rmpadjust", []>, XS, +def RMPADJUST: I<0x01, MRM_FE, (outs), (ins), "rmpadjust", []>, TB, XS, Requires<[In64BitMode]>; // F3 0F 01 FD let Uses = [RAX, RDX], Defs = [RAX, RCX, RDX, EFLAGS] in -def RMPQUERY: I<0x01, MRM_FD, (outs), (ins), "rmpquery", []>, XS, +def RMPQUERY: I<0x01, MRM_FD, (outs), (ins), "rmpquery", []>, TB, XS, Requires<[In64BitMode]>; } // SchedRW diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td index cf57fe562ed5..df1f0b5b4ca7 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td @@ -215,7 +215,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, let Predicates = [UseAVX, OptForSize] in defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>, - VEX_4V, VEX_LIG, WIG; + VEX, VVVV, VEX_LIG, WIG; def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -268,15 +268,15 @@ multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop, } defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", - SSEPackedSingle, UseSSE1>, XS; + SSEPackedSingle, UseSSE1>, TB, XS; defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", - SSEPackedDouble, UseSSE2>, XD; + SSEPackedDouble, UseSSE2>, TB, XD; let canFoldAsLoad = 1, isReMaterializable = 1 in { defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss", - SSEPackedSingle>, XS; + SSEPackedSingle>, TB, XS; defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd", - SSEPackedDouble>, XD; + SSEPackedDouble>, TB, XD; } // Patterns @@ -352,46 +352,46 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in let Predicates = [HasAVX, NoVLX] in { defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle, SchedWriteFMoveLS.XMM>, - PS, VEX, WIG; + TB, VEX, WIG; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", SSEPackedDouble, SchedWriteFMoveLS.XMM>, - PD, VEX, WIG; + TB, PD, VEX, WIG; defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", SSEPackedSingle, SchedWriteFMoveLS.XMM>, - PS, VEX, WIG; + TB, VEX, WIG; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SchedWriteFMoveLS.XMM>, - PD, VEX, WIG; + TB, PD, VEX, WIG; defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", SSEPackedSingle, SchedWriteFMoveLS.YMM>, - PS, VEX, VEX_L, WIG; + TB, VEX, VEX_L, WIG; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", SSEPackedDouble, SchedWriteFMoveLS.YMM>, - PD, VEX, VEX_L, WIG; + TB, PD, VEX, VEX_L, WIG; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle, SchedWriteFMoveLS.YMM>, - PS, VEX, VEX_L, WIG; + TB, VEX, VEX_L, WIG; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, SchedWriteFMoveLS.YMM>, - PD, VEX, VEX_L, WIG; + TB, PD, VEX, VEX_L, WIG; } let Predicates = [UseSSE1] in { defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle, SchedWriteFMoveLS.XMM>, - PS; + TB; defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", SSEPackedSingle, SchedWriteFMoveLS.XMM>, - PS; + TB; } let Predicates = [UseSSE2] in { defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", SSEPackedDouble, SchedWriteFMoveLS.XMM>, - PD; + TB, PD; defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SchedWriteFMoveLS.XMM>, - PD; + TB, PD; } let Predicates = [HasAVX, NoVLX] in { @@ -666,7 +666,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode, def PSrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "s", asm_opr), - [], SSEPackedSingle>, PS, + [], SSEPackedSingle>, TB, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; def PDrm : PI<opc, MRMSrcMem, @@ -674,7 +674,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode, !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - SSEPackedDouble>, PD, + SSEPackedDouble>, TB, PD, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -683,7 +683,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode, let Predicates = [UseAVX] in defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, - VEX_4V, WIG; + VEX, VVVV, WIG; let Constraints = "$src1 = $dst" in defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, @@ -823,14 +823,14 @@ let Predicates = [UseAVX] in { "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, - VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, WIG; + VEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>, WIG; let isCommutable = 1 in def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, - VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, WIG; + VEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>, WIG; } let Constraints = "$src1 = $dst" in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -903,36 +903,36 @@ let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPExceptio defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, - XS, VEX, VEX_LIG; + TB, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, - XS, VEX, REX_W, VEX_LIG; + TB, XS, VEX, REX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, - XD, VEX, VEX_LIG; + TB, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, - XD, VEX, REX_W, VEX_LIG; + TB, XD, VEX, REX_W, VEX_LIG; defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, "cvtss2si", "cvtss2si", WriteCvtSS2I, SSEPackedSingle>, - XS, VEX, VEX_LIG; + TB, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, "cvtss2si", "cvtss2si", WriteCvtSS2I, SSEPackedSingle>, - XS, VEX, REX_W, VEX_LIG; + TB, XS, VEX, REX_W, VEX_LIG; defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, "cvtsd2si", "cvtsd2si", WriteCvtSD2I, SSEPackedDouble>, - XD, VEX, VEX_LIG; + TB, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, "cvtsd2si", "cvtsd2si", WriteCvtSD2I, SSEPackedDouble>, - XD, VEX, REX_W, VEX_LIG; + TB, XD, VEX, REX_W, VEX_LIG; } // The assembler can recognize rr 64-bit instructions by seeing a rxx @@ -941,16 +941,16 @@ defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, // where appropriate to do so. let isCodeGenOnly = 1 in { defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", - WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, + WriteCvtI2SS, SSEPackedSingle>, TB, XS, VEX, VVVV, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", - WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, + WriteCvtI2SS, SSEPackedSingle>, TB, XS, VEX, VVVV, REX_W, VEX_LIG, SIMD_EXC; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", - WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, + WriteCvtI2SD, SSEPackedDouble>, TB, XD, VEX, VVVV, VEX_LIG; defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", - WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, + WriteCvtI2SD, SSEPackedDouble>, TB, XD, VEX, VVVV, REX_W, VEX_LIG, SIMD_EXC; } // isCodeGenOnly = 1 @@ -983,42 +983,42 @@ let Predicates = [UseAVX] in { let isCodeGenOnly = 1 in { defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, TB, XS, SIMD_EXC; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W, SIMD_EXC; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, TB, XD, SIMD_EXC; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, TB, XD, REX_W, SIMD_EXC; defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, "cvtss2si", "cvtss2si", - WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, TB, XS, SIMD_EXC; defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, "cvtss2si", "cvtss2si", - WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W, SIMD_EXC; defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, "cvtsd2si", "cvtsd2si", - WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, TB, XD, SIMD_EXC; defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, "cvtsd2si", "cvtsd2si", - WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, TB, XD, REX_W, SIMD_EXC; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32, "cvtsi2ss", "cvtsi2ss{l}", - WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, TB, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64, "cvtsi2ss", "cvtsi2ss{q}", - WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, TB, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32, "cvtsi2sd", "cvtsi2sd{l}", - WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD; + WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, TB, XD; defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64, "cvtsi2sd", "cvtsi2sd{q}", - WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; + WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, TB, XD, REX_W, SIMD_EXC; } // isCodeGenOnly = 1 let Predicates = [UseSSE1] in { @@ -1074,46 +1074,46 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [UseAVX] in { defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", - WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; + WriteCvtSD2I, SSEPackedDouble>, TB, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", - WriteCvtSD2I, SSEPackedDouble>, XD, VEX, REX_W, VEX_LIG; + WriteCvtSD2I, SSEPackedDouble>, TB, XD, VEX, REX_W, VEX_LIG; } defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, - SSEPackedDouble>, XD; + SSEPackedDouble>, TB, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, - SSEPackedDouble>, XD, REX_W; + SSEPackedDouble>, TB, XD, REX_W; } let Predicates = [UseAVX] in { defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, - XS, VEX_4V, VEX_LIG, SIMD_EXC; + TB, XS, VEX, VVVV, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, - XS, VEX_4V, VEX_LIG, REX_W, SIMD_EXC; + TB, XS, VEX, VVVV, VEX_LIG, REX_W, SIMD_EXC; defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, - XD, VEX_4V, VEX_LIG; + TB, XD, VEX, VVVV, VEX_LIG; defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, - XD, VEX_4V, VEX_LIG, REX_W, SIMD_EXC; + TB, XD, VEX, VVVV, VEX_LIG, REX_W, SIMD_EXC; } let Constraints = "$src1 = $dst" in { defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, - XS, SIMD_EXC; + TB, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, - XS, REX_W, SIMD_EXC; + TB, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, - XD; + TB, XD; defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, - XD, REX_W, SIMD_EXC; + TB, XD, REX_W, SIMD_EXC; } def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1150,34 +1150,34 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", - WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, - XS, VEX, VEX_LIG, REX_W; + TB, XS, VEX, VEX_LIG, REX_W; defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", - WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedDouble>, TB, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, - XD, VEX, VEX_LIG, REX_W; + TB, XD, VEX, VEX_LIG, REX_W; } let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", - WriteCvtSS2I, SSEPackedSingle>, XS; + WriteCvtSS2I, SSEPackedSingle>, TB, XS; defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, - XS, REX_W; + TB, XS, REX_W; defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", - WriteCvtSD2I, SSEPackedDouble>, XD; + WriteCvtSD2I, SSEPackedDouble>, TB, XD; defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, - XD, REX_W; + TB, XD, REX_W; } def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", @@ -1217,32 +1217,32 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I, SSEPackedSingle>, XS, VEX, REX_W, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, REX_W, VEX_LIG; } let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I, SSEPackedSingle>, XS; + WriteCvtSS2I, SSEPackedSingle>, TB, XS; defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I, SSEPackedSingle>, XS, REX_W; + WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, "vcvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, WriteCvtI2PS>, - PS, VEX, Requires<[HasAVX, NoVLX]>, WIG; + TB, VEX, Requires<[HasAVX, NoVLX]>, WIG; defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load, "vcvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, WriteCvtI2PSY>, - PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, WIG; + TB, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, WIG; defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, WriteCvtI2PS>, - PS, Requires<[UseSSE2]>; + TB, Requires<[UseSSE2]>; } // AVX aliases @@ -1289,13 +1289,13 @@ let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX], def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V, VEX_LIG, WIG, + VEX, VVVV, VEX_LIG, WIG, Sched<[WriteCvtSD2SS]>, SIMD_EXC; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - XD, VEX_4V, VEX_LIG, WIG, + TB, XD, VEX, VVVV, VEX_LIG, WIG, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; } @@ -1311,7 +1311,7 @@ def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, - XD, Requires<[UseSSE2, OptForSize]>, + TB, XD, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; } @@ -1321,14 +1321,14 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, - XD, VEX_4V, VEX_LIG, WIG, Requires<[UseAVX]>, + TB, XD, VEX, VVVV, VEX_LIG, WIG, Requires<[UseAVX]>, Sched<[WriteCvtSD2SS]>; def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, - XD, VEX_4V, VEX_LIG, WIG, Requires<[UseAVX]>, + TB, XD, VEX, VVVV, VEX_LIG, WIG, Requires<[UseAVX]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; let Constraints = "$src1 = $dst" in { def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, @@ -1336,13 +1336,13 @@ def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, - XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; + TB, XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, - XD, Requires<[UseSSE2]>, + TB, XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; } } @@ -1353,13 +1353,13 @@ let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - XS, VEX_4V, VEX_LIG, WIG, + TB, XS, VEX, VVVV, VEX_LIG, WIG, Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - XS, VEX_4V, VEX_LIG, WIG, + TB, XS, VEX, VVVV, VEX_LIG, WIG, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, Requires<[UseAVX, OptForSize]>, SIMD_EXC; } // isCodeGenOnly = 1, hasSideEffects = 0 @@ -1373,11 +1373,11 @@ let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (any_fpextend FR32:$src))]>, - XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; + TB, XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, - XS, Requires<[UseSSE2, OptForSize]>, + TB, XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, SIMD_EXC; } // isCodeGenOnly = 1 @@ -1386,25 +1386,25 @@ let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, VEX_4V, VEX_LIG, WIG, + []>, TB, XS, VEX, VVVV, VEX_LIG, WIG, Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>; let mayLoad = 1 in def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, VEX_4V, VEX_LIG, WIG, Requires<[HasAVX]>, + []>, TB, XS, VEX, VVVV, VEX_LIG, WIG, Requires<[HasAVX]>, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - []>, XS, Requires<[UseSSE2]>, + []>, TB, XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>; let mayLoad = 1 in def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - []>, XS, Requires<[UseSSE2]>, + []>, TB, XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; } } // hasSideEffects = 0 @@ -1699,30 +1699,30 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, - PS, VEX, Sched<[WriteCvtPS2PD]>, WIG; + TB, VEX, Sched<[WriteCvtPS2PD]>, WIG; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, - PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, WIG; + TB, VEX, Sched<[WriteCvtPS2PD.Folded]>, WIG; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>, - PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, WIG; + TB, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, WIG; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>, - PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, WIG; + TB, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, WIG; } let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, - PS, Sched<[WriteCvtPS2PD]>; + TB, Sched<[WriteCvtPS2PD]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, - PS, Sched<[WriteCvtPS2PD.Folded]>; + TB, Sched<[WriteCvtPS2PD.Folded]>; } // Convert Packed DW Integers to Packed Double FP @@ -1860,22 +1860,22 @@ let ExeDomain = SSEPackedSingle in defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, - XS, VEX_4V, VEX_LIG, WIG; + TB, XS, VEX, VVVV, VEX_LIG, WIG; let ExeDomain = SSEPackedDouble in defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, - XD, VEX_4V, VEX_LIG, WIG; + TB, XD, VEX, VVVV, VEX_LIG, WIG; let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", - SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS; + SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, TB, XS; let ExeDomain = SSEPackedDouble in defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", - SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD; + SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, TB, XD; } // sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS @@ -1919,44 +1919,44 @@ let mayLoad = 1 in let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG; + "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG; + "ucomisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG; defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, - "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG; + "comiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG; defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, - "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG; + "comisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG; let isCodeGenOnly = 1 in { defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG; + sse_load_f32, "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG; defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG; + sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG; defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG; + sse_load_f32, "comiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG; defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG; + sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, PS; + "ucomiss", SSEPackedSingle>, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, PD; + "ucomisd", SSEPackedDouble>, TB, PD; defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, - "comiss", SSEPackedSingle>, PS; + "comiss", SSEPackedSingle>, TB; defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, - "comisd", SSEPackedDouble>, PD; + "comisd", SSEPackedDouble>, TB, PD; let isCodeGenOnly = 1 in { defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", SSEPackedSingle>, PS; + sse_load_f32, "ucomiss", SSEPackedSingle>, TB; defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", SSEPackedDouble>, PD; + sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD; defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", SSEPackedSingle>, PS; + sse_load_f32, "comiss", SSEPackedSingle>, TB; defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", SSEPackedDouble>, PD; + sse_load_f64, "comisd", SSEPackedDouble>, TB, PD; } } // Defs = [EFLAGS] @@ -1979,23 +1979,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, WIG; + SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, TB, VEX, VVVV, WIG; defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, WIG; + SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, TB, PD, VEX, VVVV, WIG; defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32, "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, WIG; + SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, TB, VEX, VVVV, VEX_L, WIG; defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64, "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, WIG; + SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, TB, PD, VEX, VVVV, VEX_L, WIG; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", - SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS; + SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, TB; defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", - SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; + SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, TB, PD; } def CommutableCMPCC : PatLeaf<(timm), [{ @@ -2076,27 +2076,27 @@ let Predicates = [HasAVX, NoVLX] in { defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, - PS, VEX_4V, WIG; + TB, VEX, VVVV, WIG; defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, - PS, VEX_4V, VEX_L, WIG; + TB, VEX, VVVV, VEX_L, WIG; defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, - PD, VEX_4V, WIG; + TB, PD, VEX, VVVV, WIG; defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, - PD, VEX_4V, VEX_L, WIG; + TB, PD, VEX, VVVV, VEX_L, WIG; } let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; + memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, TB; defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; + memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD; } //===----------------------------------------------------------------------===// @@ -2126,44 +2126,44 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, let Predicates = [HasAVX, NoVLX] in { defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load, VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, WIG; + SchedWriteFShuffle.XMM, SSEPackedSingle>, TB, VEX, VVVV, WIG; defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, WIG; + SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD, VEX, VVVV, WIG; defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, WIG; + SchedWriteFShuffle.XMM, SSEPackedSingle>, TB, VEX, VVVV, WIG; defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, WIG; + SchedWriteFShuffle.XMM, SSEPackedDouble>, TB, PD, VEX, VVVV, WIG; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, WIG; + SchedWriteFShuffle.YMM, SSEPackedSingle>, TB, VEX, VVVV, VEX_L, WIG; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, WIG; + SchedWriteFShuffle.YMM, SSEPackedDouble>, TB, PD, VEX, VVVV, VEX_L, WIG; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, WIG; + SchedWriteFShuffle.YMM, SSEPackedSingle>, TB, VEX, VVVV, VEX_L, WIG; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, WIG; + SchedWriteFShuffle.YMM, SSEPackedDouble>, TB, PD, VEX, VVVV, VEX_L, WIG; }// Predicates = [HasAVX, NoVLX] let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop, VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", - SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; + SchedWriteFShuffle.XMM, SSEPackedSingle>, TB; defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop, VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; + SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD; defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop, VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", - SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; + SchedWriteFShuffle.XMM, SSEPackedSingle>, TB; defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop, VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", - SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; + SchedWriteFShuffle.XMM, SSEPackedDouble>, TB, PD; } // Constraints = "$src1 = $dst" let Predicates = [HasAVX1Only] in { @@ -2208,13 +2208,13 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, let Predicates = [HasAVX] in { defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", - SSEPackedSingle>, PS, VEX, WIG; + SSEPackedSingle>, TB, VEX, WIG; defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", - SSEPackedDouble>, PD, VEX, WIG; + SSEPackedDouble>, TB, PD, VEX, WIG; defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", - SSEPackedSingle>, PS, VEX, VEX_L, WIG; + SSEPackedSingle>, TB, VEX, VEX_L, WIG; defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", - SSEPackedDouble>, PD, VEX, VEX_L, WIG; + SSEPackedDouble>, TB, PD, VEX, VEX_L, WIG; // Also support integer VTs to avoid a int->fp bitcast in the DAG. def : Pat<(X86movmsk (v4i32 VR128:$src)), @@ -2228,9 +2228,9 @@ let Predicates = [HasAVX] in { } defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", - SSEPackedSingle>, PS; + SSEPackedSingle>, TB; defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", - SSEPackedDouble>, PD; + SSEPackedDouble>, TB, PD; let Predicates = [UseSSE2] in { // Also support integer VTs to avoid a int->fp bitcast in the DAG. @@ -2276,7 +2276,7 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, VR128, load, i128mem, sched.XMM, - IsCommutable, 0>, VEX_4V, WIG; + IsCommutable, 0>, VEX, VVVV, WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, @@ -2285,7 +2285,7 @@ let Constraints = "$src1 = $dst" in let Predicates = [HasAVX2, prd] in defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT256, VR256, load, i256mem, sched.YMM, - IsCommutable, 0>, VEX_4V, VEX_L, WIG; + IsCommutable, 0>, VEX, VVVV, VEX_L, WIG; } // These are ordered here for pattern ordering requirements with the fp versions @@ -2312,29 +2312,29 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM, - [], [], 0>, PS, VEX_4V, VEX_L, WIG; + [], [], 0>, TB, VEX, VVVV, VEX_L, WIG; defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM, - [], [], 0>, PD, VEX_4V, VEX_L, WIG; + [], [], 0>, TB, PD, VEX, VVVV, VEX_L, WIG; defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, - [], [], 0>, PS, VEX_4V, WIG; + [], [], 0>, TB, VEX, VVVV, WIG; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, - [], [], 0>, PD, VEX_4V, WIG; + [], [], 0>, TB, PD, VEX, VVVV, WIG; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, - [], []>, PS; + [], []>, TB; defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, - [], []>, PD; + [], []>, TB, PD; } } @@ -2636,26 +2636,26 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, loadv4f32, - SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, WIG; + SSEPackedSingle, sched.PS.XMM, 0>, TB, VEX, VVVV, WIG; defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, loadv2f64, - SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, WIG; + SSEPackedDouble, sched.PD.XMM, 0>, TB, PD, VEX, VVVV, WIG; defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, v8f32, f256mem, loadv8f32, - SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, WIG; + SSEPackedSingle, sched.PS.YMM, 0>, TB, VEX, VVVV, VEX_L, WIG; defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, v4f64, f256mem, loadv4f64, - SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, WIG; + SSEPackedDouble, sched.PD.YMM, 0>, TB, PD, VEX, VVVV, VEX_L, WIG; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle, - sched.PS.XMM>, PS; + sched.PS.XMM>, TB; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble, - sched.PD.XMM>, PD; + sched.PD.XMM>, TB, PD; } } } @@ -2665,18 +2665,18 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperat let Uses = [MXCSR], mayRaiseFPException = 1 in { defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>, - XS, VEX_4V, VEX_LIG, WIG; + TB, XS, VEX, VVVV, VEX_LIG, WIG; defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>, - XD, VEX_4V, VEX_LIG, WIG; + TB, XD, VEX, VVVV, VEX_LIG, WIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32, f32mem, SSEPackedSingle, - sched.PS.Scl>, XS; + sched.PS.Scl>, TB, XS; defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64, f64mem, SSEPackedDouble, - sched.PD.Scl>, XD; + sched.PD.Scl>, TB, XD; } } } @@ -2687,18 +2687,18 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, let Uses = [MXCSR], mayRaiseFPException = 1 in { defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32, !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, - SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, WIG; + SSEPackedSingle, sched.PS.Scl, 0>, TB, XS, VEX, VVVV, VEX_LIG, WIG; defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64, !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, - SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, WIG; + SSEPackedDouble, sched.PD.Scl, 0>, TB, XD, VEX, VVVV, VEX_LIG, WIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32, !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, - SSEPackedSingle, sched.PS.Scl>, XS; + SSEPackedSingle, sched.PS.Scl>, TB, XS; defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64, !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, - SSEPackedDouble, sched.PD.Scl>, XD; + SSEPackedDouble, sched.PD.Scl>, TB, XD; } } } @@ -3016,29 +3016,29 @@ let Predicates = [HasAVX, NoVLX] in { multiclass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> { defm SS : sse_fp_unop_s_intr<v4f32, sse_load_f32, !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), - UseSSE1>, XS; + UseSSE1>, TB, XS; defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32, !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), AVXTarget>, - XS, VEX_4V, VEX_LIG, WIG; + TB, XS, VEX, VVVV, VEX_LIG, WIG; } multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate AVXTarget> { defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem, - ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS; + ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, TB, XS; defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32, f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>, - XS, VEX_4V, VEX_LIG, WIG; + TB, XS, VEX, VVVV, VEX_LIG, WIG; } multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate AVXTarget> { defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem, - sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD; + sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, TB, XD; defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64, f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>, - XD, VEX_4V, VEX_LIG, WIG; + TB, XD, VEX, VVVV, VEX_LIG, WIG; } // Square root. @@ -3165,11 +3165,11 @@ let SchedRW = [WriteStoreNT] in { def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, - PS, Requires<[HasSSE2]>; + TB, Requires<[HasSSE2]>; def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movnti{q}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, - PS, Requires<[HasSSE2]>; + TB, Requires<[HasSSE2]>; } // SchedRW = [WriteStoreNT] let Predicates = [HasAVX, NoVLX] in { @@ -3226,14 +3226,14 @@ let SchedRW = [WriteLoad] in { // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - PS, Requires<[HasCLFLUSH]>; + TB, Requires<[HasCLFLUSH]>; } let SchedRW = [WriteNop] in { // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. def PAUSE : I<0x90, RawFrm, (outs), (ins), - "pause", [(int_x86_sse2_pause)]>, OBXS; + "pause", [(int_x86_sse2_pause)]>, XS; } let SchedRW = [WriteFence] in { @@ -3241,11 +3241,11 @@ let SchedRW = [WriteFence] in { // TODO: As with mfence, we may want to ease the availability of sfence/lfence // to include any 64-bit target. def SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, - PS, Requires<[HasSSE1]>; + TB, Requires<[HasSSE1]>; def LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, - PS, Requires<[HasSSE2]>; + TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, - PS, Requires<[HasMFence]>; + TB, Requires<[HasMFence]>; } // SchedRW def : Pat<(X86MFence), (MFENCE)>; @@ -3266,11 +3266,11 @@ def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, - PS, Sched<[WriteLDMXCSR]>; + TB, Sched<[WriteLDMXCSR]>; let mayStore=1, hasSideEffects=1, Uses=[MXCSR] in def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, - PS, Sched<[WriteSTMXCSR]>; + TB, Sched<[WriteSTMXCSR]>; //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions @@ -3327,11 +3327,11 @@ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv2i64 addr:$src))]>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, - XS, VEX, WIG; + TB, XS, VEX, WIG; def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}", []>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, - XS, VEX, VEX_L, WIG; + TB, XS, VEX, VEX_L, WIG; } let mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { @@ -3347,10 +3347,10 @@ def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "vmovdqu\t{$src, $dst|$dst, $src}", [(store (v2i64 VR128:$src), addr:$dst)]>, - Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, WIG; + Sched<[SchedWriteVecMoveLS.XMM.MR]>, TB, XS, VEX, WIG; def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[]>, - Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, WIG; + Sched<[SchedWriteVecMoveLS.YMM.MR]>, TB, XS, VEX, VEX_L, WIG; } let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { @@ -3360,7 +3360,7 @@ def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", []>, - XS, Requires<[UseSSE2]>; + TB, XS, Requires<[UseSSE2]>; } // For Disassembler @@ -3370,7 +3370,7 @@ def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", []>, - XS, Requires<[UseSSE2]>; + TB, XS, Requires<[UseSSE2]>; } } // SchedRW @@ -3382,7 +3382,7 @@ def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, - XS, Requires<[UseSSE2]>; + TB, XS, Requires<[UseSSE2]>; } let mayStore = 1, hasSideEffects = 0, @@ -3393,7 +3393,7 @@ def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, - XS, Requires<[UseSSE2]>; + TB, XS, Requires<[UseSSE2]>; } } // ExeDomain = SSEPackedInt @@ -3537,12 +3537,12 @@ defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, load, i128mem, SchedWriteVecIMul.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, VR256, load, i256mem, SchedWriteVecIMul.YMM, - 0>, VEX_4V, VEX_L, WIG; + 0>, VEX, VVVV, VEX_L, WIG; let Constraints = "$src1 = $dst" in defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, memop, i128mem, SchedWriteVecIMul.XMM>; @@ -3550,11 +3550,11 @@ defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, load, i128mem, SchedWritePSADBW.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, load, i256mem, SchedWritePSADBW.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; let Constraints = "$src1 = $dst" in defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, memop, i128mem, SchedWritePSADBW.XMM>; @@ -3604,11 +3604,11 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM, - DstVT128, SrcVT, load, 0>, VEX_4V, WIG; + DstVT128, SrcVT, load, 0>, VEX, VVVV, WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM, - DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L, + DstVT256, SrcVT, load, 0>, VEX, VVVV, VEX_L, WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, @@ -3631,11 +3631,11 @@ multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, - VR128, v16i8, sched.XMM, 0>, VEX_4V, WIG; + VR128, v16i8, sched.XMM, 0>, VEX, VVVV, WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, VR256, v32i8, sched.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8, sched.XMM>; @@ -3757,11 +3757,11 @@ let Predicates = [UseSSE2] in { } // ExeDomain = SSEPackedInt defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, - SchedWriteShuffle, NoVLX>, PD; + SchedWriteShuffle, NoVLX>, TB, PD; defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, - SchedWriteShuffle, NoVLX_Or_NoBWI>, XS; + SchedWriteShuffle, NoVLX_Or_NoBWI>, TB, XS; defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, - SchedWriteShuffle, NoVLX_Or_NoBWI>, XD; + SchedWriteShuffle, NoVLX_Or_NoBWI>, TB, XD; //===---------------------------------------------------------------------===// // Packed Integer Pack Instructions (SSE & AVX) @@ -3821,33 +3821,33 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; } let Constraints = "$src1 = $dst" in { @@ -3892,61 +3892,61 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; } let Predicates = [HasAVX, NoVLX] in { defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; } let Predicates = [HasAVX2, NoVLX] in { defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; } let Constraints = "$src1 = $dst" in { @@ -4004,7 +4004,7 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg, "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), timm:$src2))]>, - PD, VEX, WIG, Sched<[WriteVecExtract]>; + TB, PD, VEX, WIG, Sched<[WriteVecExtract]>; def PEXTRWrr : PDIi8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -4014,10 +4014,10 @@ def PEXTRWrr : PDIi8<0xC5, MRMSrcReg, // Insert let Predicates = [HasAVX, NoBWI] in -defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, WIG; +defm VPINSRW : sse2_pinsrw<0>, TB, PD, VEX, VVVV, WIG; let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in -defm PINSRW : sse2_pinsrw, PD; +defm PINSRW : sse2_pinsrw, TB, PD; } // ExeDomain = SSEPackedInt @@ -4306,13 +4306,13 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in { def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, TB, XS, VEX, Requires<[UseAVX]>, WIG; def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, - XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix + TB, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix } // ExeDomain, SchedRW //===---------------------------------------------------------------------===// @@ -4369,11 +4369,11 @@ let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, - XS, VEX, Requires<[UseAVX]>, WIG; + TB, XS, VEX, Requires<[UseAVX]>, WIG; def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, - XS, Requires<[UseSSE2]>; + TB, XS, Requires<[UseSSE2]>; } // ExeDomain, SchedRW let Predicates = [UseAVX] in { @@ -4563,27 +4563,27 @@ let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>, - XD, VEX_4V, WIG; + TB, XD, VEX, VVVV, WIG; defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>, - XD, VEX_4V, VEX_L, WIG; + TB, XD, VEX, VVVV, VEX_L, WIG; } let ExeDomain = SSEPackedDouble in { defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>, - PD, VEX_4V, WIG; + TB, PD, VEX, VVVV, WIG; defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>, - PD, VEX_4V, VEX_L, WIG; + TB, PD, VEX, VVVV, VEX_L, WIG; } } let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, - SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD; + SchedWriteFAddSizes.PS.XMM, memopv4f32>, TB, XD; let ExeDomain = SSEPackedDouble in defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, - SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD; + SchedWriteFAddSizes.PD.XMM, memopv2f64>, TB, PD; } //===---------------------------------------------------------------------===// @@ -4635,23 +4635,23 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, WIG; + X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX, VVVV, WIG; defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, WIG; + X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX, VVVV, WIG; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, WIG; + X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX, VVVV, VEX_L, WIG; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, WIG; + X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX, VVVV, VEX_L, WIG; } let ExeDomain = SSEPackedDouble in { defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem, - X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, WIG; + X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX, VVVV, WIG; defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem, - X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, WIG; + X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX, VVVV, WIG; defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, WIG; + X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX, VVVV, VEX_L, WIG; defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, WIG; + X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX, VVVV, VEX_L, WIG; } } @@ -4806,45 +4806,45 @@ let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { let isCommutable = 0 in { defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, VR128, load, i128mem, - SchedWriteVarShuffle.XMM, 0>, VEX_4V, WIG; + SchedWriteVarShuffle.XMM, 0>, VEX, VVVV, WIG; defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, v16i8, VR128, load, i128mem, - SchedWriteVecIMul.XMM, 0>, VEX_4V, WIG; + SchedWriteVecIMul.XMM, 0>, VEX, VVVV, WIG; } defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, VR128, load, i128mem, - SchedWriteVecIMul.XMM, 0>, VEX_4V, WIG; + SchedWriteVecIMul.XMM, 0>, VEX, VVVV, WIG; } let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, load, i128mem, - SchedWritePHAdd.XMM, 0>, VEX_4V, WIG; + SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG; defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, load, i128mem, - SchedWritePHAdd.XMM, 0>, VEX_4V, WIG; + SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG; defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, load, i128mem, - SchedWritePHAdd.XMM, 0>, VEX_4V, WIG; + SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG; defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, load, i128mem, - SchedWritePHAdd.XMM, 0>, VEX_4V, WIG; + SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG; defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", int_x86_ssse3_psign_b_128, - SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG; + SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG; defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", int_x86_ssse3_psign_w_128, - SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG; + SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG; defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", int_x86_ssse3_psign_d_128, - SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG; + SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG; defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, - SchedWritePHAdd.XMM, load, 0>, VEX_4V, WIG; + SchedWritePHAdd.XMM, load, 0>, VEX, VVVV, WIG; defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, - SchedWritePHAdd.XMM, load, 0>, VEX_4V, WIG; + SchedWritePHAdd.XMM, load, 0>, VEX, VVVV, WIG; } } @@ -4852,42 +4852,42 @@ let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { let isCommutable = 0 in { defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, VR256, load, i256mem, - SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, WIG; + SchedWriteVarShuffle.YMM, 0>, VEX, VVVV, VEX_L, WIG; defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, v32i8, VR256, load, i256mem, - SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, WIG; + SchedWriteVecIMul.YMM, 0>, VEX, VVVV, VEX_L, WIG; } defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, VR256, load, i256mem, - SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, WIG; + SchedWriteVecIMul.YMM, 0>, VEX, VVVV, VEX_L, WIG; } let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, VR256, load, i256mem, - SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG; + SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG; defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, load, i256mem, - SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG; + SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG; defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, VR256, load, i256mem, - SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG; + SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG; defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, load, i256mem, - SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG; + SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG; defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, - SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG; + SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG; defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, - SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG; + SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG; defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, - SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG; + SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw, - SchedWritePHAdd.YMM>, VEX_4V, VEX_L, WIG; + SchedWritePHAdd.YMM>, VEX, VVVV, VEX_L, WIG; defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw, - SchedWritePHAdd.YMM>, VEX_4V, VEX_L, WIG; + SchedWritePHAdd.YMM>, VEX, VVVV, VEX_L, WIG; } } @@ -4956,10 +4956,10 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem, - SchedWriteShuffle.XMM, 0>, VEX_4V, WIG; + SchedWriteShuffle.XMM, 0>, VEX, VVVV, WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem, - SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, WIG; + SchedWriteShuffle.YMM, 0>, VEX, VVVV, VEX_L, WIG; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem, SchedWriteShuffle.XMM>; @@ -5367,7 +5367,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { } let Predicates = [HasAVX, NoBWI] in { - defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, WIG; + defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX, VVVV, WIG; def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), (VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit), timm:$src3)>; @@ -5398,7 +5398,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { } let Predicates = [HasAVX, NoDQI] in - defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; + defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX, VVVV; let Constraints = "$src1 = $dst" in defm PINSRD : SS41I_insert32<0x22, "pinsrd">; @@ -5424,7 +5424,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { } let Predicates = [HasAVX, NoDQI] in - defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, REX_W; + defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX, VVVV, REX_W; let Constraints = "$src1 = $dst" in defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; @@ -5459,7 +5459,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { let ExeDomain = SSEPackedSingle in { let Predicates = [UseAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; let Constraints = "$src1 = $dst" in defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; } @@ -5638,9 +5638,9 @@ let Predicates = [HasAVX, NoVLX] in { let Predicates = [UseAVX] in { defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, v4f32, v2f64, X86RndScales, 0>, - VEX_4V, VEX_LIG, WIG, SIMD_EXC; + VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, - VEX_4V, VEX_LIG, WIG, SIMD_EXC; + VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; } let Predicates = [UseAVX] in { @@ -5760,33 +5760,33 @@ let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, - Sched<[WritePOPCNT]>, OpSize16, XS; + Sched<[WritePOPCNT]>, OpSize16, TB, XS; def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctpop (loadi16 addr:$src))), (implicit EFLAGS)]>, - Sched<[WritePOPCNT.Folded]>, OpSize16, XS; + Sched<[WritePOPCNT.Folded]>, OpSize16, TB, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, - Sched<[WritePOPCNT]>, OpSize32, XS; + Sched<[WritePOPCNT]>, OpSize32, TB, XS; def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctpop (loadi32 addr:$src))), (implicit EFLAGS)]>, - Sched<[WritePOPCNT.Folded]>, OpSize32, XS; + Sched<[WritePOPCNT.Folded]>, OpSize32, TB, XS; def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, - Sched<[WritePOPCNT]>, XS; + Sched<[WritePOPCNT]>, TB, XS; def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctpop (loadi64 addr:$src))), (implicit EFLAGS)]>, - Sched<[WritePOPCNT.Folded]>, XS; + Sched<[WritePOPCNT.Folded]>, TB, XS; } // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. @@ -5842,65 +5842,65 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX, NoVLX] in { defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128, load, i128mem, SchedWriteVecIMul.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; } let Predicates = [HasAVX2, NoVLX] in { defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256, load, i256mem, SchedWriteVecIMul.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; } let Constraints = "$src1 = $dst" in { @@ -5927,20 +5927,20 @@ let Constraints = "$src1 = $dst" in { let Predicates = [HasAVX, NoVLX] in defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, load, i128mem, SchedWritePMULLD.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; let Predicates = [HasAVX] in defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; let Predicates = [HasAVX2, NoVLX] in defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, load, i256mem, SchedWritePMULLD.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; let Predicates = [HasAVX2] in defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; let Constraints = "$src1 = $dst" in { defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, @@ -6088,22 +6088,22 @@ let Predicates = [HasAVX] in { let isCommutable = 0 in { defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, VR128, load, i128mem, 0, - SchedWriteMPSAD.XMM>, VEX_4V, WIG; + SchedWriteMPSAD.XMM>, VEX, VVVV, WIG; } let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, load, f128mem, 0, - SchedWriteDPPS.XMM>, VEX_4V, WIG; + SchedWriteDPPS.XMM>, VEX, VVVV, WIG; let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, VR128, load, f128mem, 0, - SchedWriteDPPD.XMM>, VEX_4V, WIG; + SchedWriteDPPD.XMM>, VEX, VVVV, WIG; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, VR256, load, i256mem, 0, - SchedWriteDPPS.YMM>, VEX_4V, VEX_L, WIG; + SchedWriteDPPS.YMM>, VEX, VVVV, VEX_L, WIG; } } @@ -6111,7 +6111,7 @@ let Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, VR256, load, i256mem, 0, - SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, WIG; + SchedWriteMPSAD.YMM>, VEX, VVVV, VEX_L, WIG; } } @@ -6170,30 +6170,30 @@ let Predicates = [HasAVX] in { defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32, VR128, load, f128mem, 0, SSEPackedSingle, SchedWriteFBlend.XMM, BlendCommuteImm4>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, VR256, load, f256mem, 0, SSEPackedSingle, SchedWriteFBlend.YMM, BlendCommuteImm8>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, VR128, load, f128mem, 0, SSEPackedDouble, SchedWriteFBlend.XMM, BlendCommuteImm2>, - VEX_4V, WIG; + VEX, VVVV, WIG; defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, VR256, load, f256mem, 0, SSEPackedDouble, SchedWriteFBlend.YMM, BlendCommuteImm4>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, VR128, load, i128mem, 0, SSEPackedInt, SchedWriteBlend.XMM, BlendCommuteImm8>, - VEX_4V, WIG; + VEX, VVVV, WIG; } let Predicates = [HasAVX2] in { defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16, VR256, load, i256mem, 0, SSEPackedInt, SchedWriteBlend.YMM, BlendCommuteImm8>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; } // Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. @@ -6290,7 +6290,7 @@ multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))], - SSEPackedInt>, TAPD, VEX_4V, + SSEPackedInt>, TA, PD, VEX, VVVV, Sched<[sched]>; def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst), @@ -6299,7 +6299,7 @@ multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src3, (mem_frag addr:$src2), - RC:$src1))], SSEPackedInt>, TAPD, VEX_4V, + RC:$src1))], SSEPackedInt>, TA, PD, VEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, // x86memop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -6564,12 +6564,12 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX] in defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, load, i128mem, SchedWriteVecALU.XMM, 0>, - VEX_4V, WIG; + VEX, VVVV, WIG; let Predicates = [HasAVX2] in defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, load, i256mem, SchedWriteVecALU.YMM, 0>, - VEX_4V, VEX_L, WIG; + VEX, VVVV, VEX_L, WIG; let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, @@ -6706,37 +6706,37 @@ let Constraints = "$src1 = $dst" in { // FIXME: Is there a better scheduler class for SHA than WriteVecIMul? multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, - X86FoldableSchedWrite sched, bit UsesXMM0 = 0> { - def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !if(UsesXMM0, - !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), - [!if(UsesXMM0, - (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), - (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, - T8PS, Sched<[sched]>; - - def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !if(UsesXMM0, - !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), - [!if(UsesXMM0, - (set VR128:$dst, (IntId VR128:$src1, - (memop addr:$src2), XMM0)), - (set VR128:$dst, (IntId VR128:$src1, - (memop addr:$src2))))]>, T8PS, - Sched<[sched.Folded, sched.ReadAfterFold]>; + X86FoldableSchedWrite sched, string Suffix = "", bit UsesXMM0 = 0> { + def rr#Suffix : I<Opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(UsesXMM0, + !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), + [!if(UsesXMM0, + (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), + (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, + T8, Sched<[sched]>; + + def rm#Suffix : I<Opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(UsesXMM0, + !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), + [!if(UsesXMM0, + (set VR128:$dst, (IntId VR128:$src1, + (memop addr:$src2), XMM0)), + (set VR128:$dst, (IntId VR128:$src1, + (memop addr:$src2))))]>, T8, + Sched<[sched.Folded, sched.ReadAfterFold]>; } -let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { +let Constraints = "$src1 = $dst", Predicates = [HasSHA, NoEGPR] in { def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, u8imm:$src3), "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, - (i8 timm:$src3)))]>, TAPS, + (i8 timm:$src3)))]>, TA, Sched<[SchedWriteVecIMul.XMM]>; def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), @@ -6744,7 +6744,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, (memop addr:$src2), - (i8 timm:$src3)))]>, TAPS, + (i8 timm:$src3)))]>, TA, Sched<[SchedWriteVecIMul.XMM.Folded, SchedWriteVecIMul.XMM.ReadAfterFold]>; @@ -6757,7 +6757,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { let Uses=[XMM0] in defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, - SchedWriteVecIMul.XMM, 1>; + SchedWriteVecIMul.XMM, "", 1>; defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, SchedWriteVecIMul.XMM>; @@ -6765,11 +6765,47 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { SchedWriteVecIMul.XMM>; } -// Aliases with explicit %xmm0 -def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", - (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>; -def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", - (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>; +let Constraints = "$src1 = $dst", Predicates = [HasSHA, HasEGPR, In64BitMode] in { + def SHA1RNDS4rri_EVEX: Ii8<0xD4, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, u8imm:$src3), + "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, + (i8 timm:$src3)))]>, + EVEX, NoCD8, T_MAP4, Sched<[SchedWriteVecIMul.XMM]>; + def SHA1RNDS4rmi_EVEX: Ii8<0xD4, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, u8imm:$src3), + "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (int_x86_sha1rnds4 VR128:$src1, + (memop addr:$src2), + (i8 timm:$src3)))]>, + EVEX, NoCD8, T_MAP4, + Sched<[SchedWriteVecIMul.XMM.Folded, + SchedWriteVecIMul.XMM.ReadAfterFold]>; + + defm SHA1NEXTE : SHAI_binop<0xD8, "sha1nexte", int_x86_sha1nexte, + SchedWriteVecIMul.XMM, "_EVEX">, + EVEX, NoCD8, T_MAP4; + defm SHA1MSG1 : SHAI_binop<0xD9, "sha1msg1", int_x86_sha1msg1, + SchedWriteVecIMul.XMM, "_EVEX">, + EVEX, NoCD8, T_MAP4; + defm SHA1MSG2 : SHAI_binop<0xDA, "sha1msg2", int_x86_sha1msg2, + SchedWriteVecIMul.XMM, "_EVEX">, + EVEX, NoCD8, T_MAP4; + + let Uses=[XMM0] in + defm SHA256RNDS2 : SHAI_binop<0xDB, "sha256rnds2", int_x86_sha256rnds2, + SchedWriteVecIMul.XMM, "_EVEX", 1>, + EVEX, NoCD8, T_MAP4; + + defm SHA256MSG1 : SHAI_binop<0xDC, "sha256msg1", int_x86_sha256msg1, + SchedWriteVecIMul.XMM, "_EVEX">, + EVEX, NoCD8, T_MAP4; + defm SHA256MSG2 : SHAI_binop<0xDD, "sha256msg2", int_x86_sha256msg2, + SchedWriteVecIMul.XMM, "_EVEX">, + EVEX, NoCD8, T_MAP4; +} //===----------------------------------------------------------------------===// // AES-NI Instructions @@ -6796,28 +6832,28 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, // Perform One Round of an AES Encryption/Decryption Flow let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in { defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", - int_x86_aesni_aesenc, load>, VEX_4V, WIG; + int_x86_aesni_aesenc, load>, VEX, VVVV, WIG; defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", - int_x86_aesni_aesenclast, load>, VEX_4V, WIG; + int_x86_aesni_aesenclast, load>, VEX, VVVV, WIG; defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", - int_x86_aesni_aesdec, load>, VEX_4V, WIG; + int_x86_aesni_aesdec, load>, VEX, VVVV, WIG; defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", - int_x86_aesni_aesdeclast, load>, VEX_4V, WIG; + int_x86_aesni_aesdeclast, load>, VEX, VVVV, WIG; } let Predicates = [NoVLX, HasVAES] in { defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc", int_x86_aesni_aesenc_256, load, 0, VR256, - i256mem>, VEX_4V, VEX_L, WIG; + i256mem>, VEX, VVVV, VEX_L, WIG; defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast", int_x86_aesni_aesenclast_256, load, 0, VR256, - i256mem>, VEX_4V, VEX_L, WIG; + i256mem>, VEX, VVVV, VEX_L, WIG; defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec", int_x86_aesni_aesdec_256, load, 0, VR256, - i256mem>, VEX_4V, VEX_L, WIG; + i256mem>, VEX, VVVV, VEX_L, WIG; defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast", int_x86_aesni_aesdeclast_256, load, 0, VR256, - i256mem>, VEX_4V, VEX_L, WIG; + i256mem>, VEX, VVVV, VEX_L, WIG; } let Constraints = "$src1 = $dst" in { @@ -6958,11 +6994,11 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load, - int_x86_pclmulqdq>, VEX_4V, WIG; + int_x86_pclmulqdq>, VEX, VVVV, WIG; let Predicates = [NoVLX, HasVPCLMULQDQ] in defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load, - int_x86_pclmulqdq_256>, VEX_4V, VEX_L, WIG; + int_x86_pclmulqdq_256>, VEX, VVVV, VEX_L, WIG; multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC, X86MemOperand MemOp, string Hi, string Lo> { @@ -6999,26 +7035,26 @@ def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), "extrq\t{$idx, $len, $src|$src, $len, $idx}", [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, timm:$idx))]>, - PD, Sched<[SchedWriteVecALU.XMM]>; + TB, PD, Sched<[SchedWriteVecALU.XMM]>; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "extrq\t{$mask, $src|$src, $mask}", [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, VR128:$mask))]>, - PD, Sched<[SchedWriteVecALU.XMM]>; + TB, PD, Sched<[SchedWriteVecALU.XMM]>; def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, timm:$len, timm:$idx))]>, - XD, Sched<[SchedWriteVecALU.XMM]>; + TB, XD, Sched<[SchedWriteVecALU.XMM]>; def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "insertq\t{$mask, $src|$src, $mask}", [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, VR128:$mask))]>, - XD, Sched<[SchedWriteVecALU.XMM]>; + TB, XD, Sched<[SchedWriteVecALU.XMM]>; } } // ExeDomain = SSEPackedInt @@ -7026,10 +7062,10 @@ def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), let AddedComplexity = 400 in { // Prefer non-temporal versions let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in { def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), - "movntss\t{$src, $dst|$dst, $src}", []>, XS; + "movntss\t{$src, $dst|$dst, $src}", []>, TB, XS; def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movntsd\t{$src, $dst|$dst, $src}", []>, XD; + "movntsd\t{$src, $dst|$dst, $src}", []>, TB, XD; } // SchedRW def : Pat<(nontemporalstore FR32:$src, addr:$dst), @@ -7133,11 +7169,11 @@ let isCommutable = 1 in def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; + VEX, VVVV, VEX_L, Sched<[WriteFShuffle256]>; def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; + VEX, VVVV, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; } // Immediate transform to help with commuting. @@ -7176,12 +7212,12 @@ let hasSideEffects = 0, ExeDomain = SSEPackedSingle in { def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, u8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L; + []>, Sched<[WriteFShuffle256]>, VEX, VVVV, VEX_L; let mayLoad = 1 in def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f128mem:$src2, u8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; + []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L; } // To create a 256-bit all ones value, we should produce VCMPTRUEPS @@ -7279,22 +7315,22 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, (ins VR128:$src1, f128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, - VEX_4V, Sched<[schedX.RM]>; + VEX, VVVV, Sched<[schedX.RM]>; def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V, VEX_L, Sched<[schedY.RM]>; + VEX, VVVV, VEX_L, Sched<[schedY.RM]>; def mr : AVX8I<opc_mr, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, - VEX_4V, Sched<[schedX.MR]>; + VEX, VVVV, Sched<[schedX.MR]>; def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, - VEX_4V, VEX_L, Sched<[schedY.MR]>; + VEX, VVVV, VEX_L, Sched<[schedY.MR]>; } let ExeDomain = SSEPackedSingle in @@ -7325,14 +7361,14 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i128mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, (loadv4i32 addr:$src3))))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM.Folded, + VEX, VVVV, Sched<[SchedWriteVecIMul.XMM.Folded, SchedWriteVecIMul.XMM.ReadAfterFold, SchedWriteVecIMul.XMM.ReadAfterFold]>; @@ -7342,14 +7378,14 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; + VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, (loadv8i32 addr:$src3))))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded, + VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded, SchedWriteVecIMul.YMM.ReadAfterFold, SchedWriteVecIMul.YMM.ReadAfterFold]>; } @@ -7388,13 +7424,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, + [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX, VVVV, Sched<[varsched]>; def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop_i:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, - (i_vt (load addr:$src2)))))]>, VEX_4V, + (i_vt (load addr:$src2)))))]>, VEX, VVVV, Sched<[varsched.Folded, sched.ReadAfterFold]>; def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), @@ -7438,12 +7474,12 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { // Zero All YMM registers def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", - [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, + [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>, WIG; // Zero Upper bits of YMM registers def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", - [(int_x86_avx_vzeroupper)]>, PS, VEX, + [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>, WIG; } // Defs } // SchedRW @@ -7457,11 +7493,11 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), "vcvtph2ps\t{$src, $dst|$dst, $src}", [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>, - T8PD, VEX, Sched<[sched]>; + T8, PD, VEX, Sched<[sched]>; let hasSideEffects = 0, mayLoad = 1 in def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), "vcvtph2ps\t{$src, $dst|$dst, $src}", - []>, T8PD, VEX, Sched<[sched.Folded]>; + []>, T8, PD, VEX, Sched<[sched.Folded]>; } multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, @@ -7470,12 +7506,12 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, (ins RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>, - TAPD, VEX, Sched<[RR]>; + TA, PD, VEX, Sched<[RR]>; let hasSideEffects = 0, mayStore = 1 in def mr : Ii8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TAPD, VEX, Sched<[MR]>; + TA, PD, VEX, Sched<[MR]>; } let Predicates = [HasF16C, NoVLX] in { @@ -7522,14 +7558,14 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, - Sched<[sched]>, VEX_4V; + Sched<[sched]>, VEX, VVVV; def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; + Sched<[sched.Folded, sched.ReadAfterFold]>, VEX, VVVV; // Pattern to commute if load is in first source. def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), @@ -7779,7 +7815,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, - Sched<[Sched]>, VEX_4V, VEX_L; + Sched<[Sched]>, VEX, VVVV, VEX_L; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, memOp:$src2), !strconcat(OpcodeStr, @@ -7787,7 +7823,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, [(set VR256:$dst, (OpVT (X86VPermv VR256:$src1, (load addr:$src2))))]>, - Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L; + Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VVVV, VEX_L; } } @@ -7830,11 +7866,11 @@ let isCommutable = 1 in def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - Sched<[WriteShuffle256]>, VEX_4V, VEX_L; + Sched<[WriteShuffle256]>, VEX, VVVV, VEX_L; def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; + Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L; let Predicates = [HasAVX2] in { defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; @@ -7852,12 +7888,12 @@ let hasSideEffects = 0 in { def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, u8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; + []>, Sched<[WriteShuffle256]>, VEX, VVVV, VEX_L; let mayLoad = 1 in def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i128mem:$src2, u8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; + []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L; } let Predicates = [HasAVX2, NoVLX] in { @@ -7903,22 +7939,22 @@ multiclass avx2_pmovmask<string OpcodeStr, (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, - VEX_4V, Sched<[schedX.RM]>; + VEX, VVVV, Sched<[schedX.RM]>; def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V, VEX_L, Sched<[schedY.RM]>; + VEX, VVVV, VEX_L, Sched<[schedY.RM]>; def mr : AVX28I<0x8e, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, - VEX_4V, Sched<[schedX.MR]>; + VEX, VVVV, Sched<[schedX.MR]>; def Ymr : AVX28I<0x8e, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, - VEX_4V, VEX_L, Sched<[schedY.MR]>; + VEX, VVVV, VEX_L, Sched<[schedY.MR]>; } defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", @@ -7976,28 +8012,28 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, - VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>; + VEX, VVVV, Sched<[SchedWriteVarVecShift.XMM]>; def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (vt128 (load addr:$src2)))))]>, - VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, + VEX, VVVV, Sched<[SchedWriteVarVecShift.XMM.Folded, SchedWriteVarVecShift.XMM.ReadAfterFold]>; def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; + VEX, VVVV, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (vt256 (load addr:$src2)))))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, + VEX, VVVV, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, SchedWriteVarVecShift.YMM.ReadAfterFold]>; } @@ -8073,12 +8109,12 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, let isCommutable = 1 in def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, - Sched<[sched]>, T8PD; + Sched<[sched]>, T8; def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, (MemOpFrag addr:$src2))))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>, T8PD; + Sched<[sched.Folded, sched.ReadAfterFold]>, T8; } } @@ -8110,10 +8146,10 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { let Predicates = [HasGFNI, HasAVX, NoVLX] in { defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128, load, i128mem, SchedWriteVecIMul.XMM>, - VEX_4V, REX_W; + VEX, VVVV, REX_W; defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256, load, i256mem, SchedWriteVecIMul.YMM>, - VEX_4V, VEX_L, REX_W; + VEX, VVVV, VEX_L, REX_W; } } @@ -8124,16 +8160,16 @@ defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop, i128mem, SchedWriteVecALU.XMM, 1>; let Predicates = [HasGFNI, HasAVX, NoVLX] in { defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load, - i128mem, SchedWriteVecALU.XMM>, VEX_4V; + i128mem, SchedWriteVecALU.XMM>, VEX, VVVV; defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load, - i256mem, SchedWriteVecALU.YMM>, VEX_4V, VEX_L; + i256mem, SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L; } // GF2P8AFFINEINVQB, GF2P8AFFINEQB let isCommutable = 0 in { defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", - X86GF2P8affineinvqb>, TAPD; + X86GF2P8affineinvqb>, TA, PD; defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", - X86GF2P8affineqb>, TAPD; + X86GF2P8affineqb>, TA, PD; } // AVX-IFMA @@ -8147,28 +8183,28 @@ multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> { !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, (v2i64 (OpNode VR128:$src2, VR128:$src3, VR128:$src1)))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; } def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i128mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, (v2i64 (OpNode VR128:$src2, (loadv2i64 addr:$src3), VR128:$src1)))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; let isCommutable = 1 in { def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR256:$dst, (v4i64 (OpNode VR256:$src2, VR256:$src3, VR256:$src1)))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; + VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; } def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR256:$dst, (v4i64 (OpNode VR256:$src2, (loadv4i64 addr:$src3), VR256:$src1)))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; + VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; } defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, REX_W, ExplicitVEXPrefix; @@ -8186,52 +8222,52 @@ multiclass avx_dotprod_rm<bits<8> Opc, string OpcodeStr, ValueType OpVT, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, - VEX_4V, Sched<[Sched]>; + VEX, VVVV, Sched<[Sched]>; def rm : I<Opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, RC:$src2, X86memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, (MemOpFrag addr:$src3))))]>, - VEX_4V, Sched<[Sched.Folded, Sched.ReadAfterFold]>; + VEX, VVVV, Sched<[Sched.Folded, Sched.ReadAfterFold]>; } let Predicates = [HasAVXVNNIINT8] in { defm VPDPBSSD : avx_dotprod_rm<0x50,"vpdpbssd", v4i32, VR128, loadv4i32, i128mem, X86vpdpbssd, SchedWriteVecIMul.XMM, - 1>, T8XD; + 1>, T8, XD; defm VPDPBSSDY : avx_dotprod_rm<0x50,"vpdpbssd", v8i32, VR256, loadv8i32, i256mem, X86vpdpbssd, SchedWriteVecIMul.YMM, - 1>, VEX_L, T8XD; + 1>, VEX_L, T8, XD; defm VPDPBUUD : avx_dotprod_rm<0x50,"vpdpbuud", v4i32, VR128, loadv4i32, i128mem, X86vpdpbuud, SchedWriteVecIMul.XMM, - 1>, T8PS; + 1>, T8; defm VPDPBUUDY : avx_dotprod_rm<0x50,"vpdpbuud", v8i32, VR256, loadv8i32, i256mem, X86vpdpbuud, SchedWriteVecIMul.YMM, - 1>, VEX_L, T8PS; + 1>, VEX_L, T8; defm VPDPBSSDS : avx_dotprod_rm<0x51,"vpdpbssds", v4i32, VR128, loadv4i32, i128mem, X86vpdpbssds, SchedWriteVecIMul.XMM, - 1>, T8XD; + 1>, T8, XD; defm VPDPBSSDSY : avx_dotprod_rm<0x51,"vpdpbssds", v8i32, VR256, loadv8i32, i256mem, X86vpdpbssds, SchedWriteVecIMul.YMM, - 1>, VEX_L, T8XD; + 1>, VEX_L, T8, XD; defm VPDPBUUDS : avx_dotprod_rm<0x51,"vpdpbuuds", v4i32, VR128, loadv4i32, i128mem, X86vpdpbuuds, SchedWriteVecIMul.XMM, - 1>, T8PS; + 1>, T8; defm VPDPBUUDSY : avx_dotprod_rm<0x51,"vpdpbuuds", v8i32, VR256, loadv8i32, i256mem, X86vpdpbuuds, SchedWriteVecIMul.YMM, - 1>, VEX_L, T8PS; + 1>, VEX_L, T8; defm VPDPBSUD : avx_dotprod_rm<0x50,"vpdpbsud", v4i32, VR128, loadv4i32, i128mem, X86vpdpbsud, SchedWriteVecIMul.XMM, - 0>, T8XS; + 0>, T8, XS; defm VPDPBSUDY : avx_dotprod_rm<0x50,"vpdpbsud", v8i32, VR256, loadv8i32, i256mem, X86vpdpbsud, SchedWriteVecIMul.YMM, - 0>, VEX_L, T8XS; + 0>, VEX_L, T8, XS; defm VPDPBSUDS : avx_dotprod_rm<0x51,"vpdpbsuds", v4i32, VR128, loadv4i32, i128mem, X86vpdpbsuds, SchedWriteVecIMul.XMM, - 0>, T8XS; + 0>, T8, XS; defm VPDPBSUDSY : avx_dotprod_rm<0x51,"vpdpbsuds", v8i32, VR256, loadv8i32, i256mem, X86vpdpbsuds, SchedWriteVecIMul.YMM, - 0>, VEX_L, T8XS; + 0>, VEX_L, T8, XS; } // AVX-NE-CONVERT @@ -8270,18 +8306,18 @@ multiclass VCVTNEPS2BF16_BASE { let Predicates = [HasAVXNECONVERT] in { defm VBCSTNEBF162PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnebf162ps", f16mem, - f16mem>, T8XS; + f16mem>, T8, XS; defm VBCSTNESH2PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnesh2ps", f16mem, f16mem>, - T8PD; + T8, PD; defm VCVTNEEBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneebf162ps", f128mem, - f256mem>, T8XS; + f256mem>, T8, XS; defm VCVTNEEPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneeph2ps", f128mem, - f256mem>, T8PD; + f256mem>, T8, PD; defm VCVTNEOBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneobf162ps", f128mem, - f256mem>, T8XD; + f256mem>, T8, XD; defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem, - f256mem>, T8PS; - defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix; + f256mem>, T8; + defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8, XS, ExplicitVEXPrefix; def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))), (VCVTNEPS2BF16Yrr VR256:$src)>; @@ -8301,19 +8337,19 @@ def VSHA512MSG1rr : I<0xcc, MRMSrcReg, (outs VR256:$dst), "vsha512msg1\t{$src2, $dst|$dst, $src2}", [(set VR256:$dst, (int_x86_vsha512msg1 VR256:$src1, VR128:$src2))]>, VEX_L, - VEX, T8XD, Sched<[WriteVecIMul]>; + VEX, T8, XD, Sched<[WriteVecIMul]>; def VSHA512MSG2rr : I<0xcd, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), "vsha512msg2\t{$src2, $dst|$dst, $src2}", [(set VR256:$dst, (int_x86_vsha512msg2 VR256:$src1, VR256:$src2))]>, VEX_L, - VEX, T8XD, Sched<[WriteVecIMul]>; + VEX, T8, XD, Sched<[WriteVecIMul]>; def VSHA512RNDS2rr : I<0xcb, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR128:$src3), "vsha512rnds2\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR256:$dst, (int_x86_vsha512rnds2 VR256:$src1, VR256:$src2, VR128:$src3))]>, - VEX_L, VEX_4V, T8XD, Sched<[WriteVecIMul]>; + VEX_L, VEX, VVVV, T8, XD, Sched<[WriteVecIMul]>; } // FIXME: Is there a better scheduler class for SM3 than WriteVecIMul? @@ -8325,14 +8361,14 @@ let Predicates = [HasSM3], Constraints = "$src1 = $dst" in { [(set VR128:$dst, (!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1, VR128:$src2, VR128:$src3))]>, - Sched<[WriteVecIMul]>, VEX_4V; + Sched<[WriteVecIMul]>, VEX, VVVV; def rm : I<0xda, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i128mem:$src3), !strconcat(OpStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, (!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1, VR128:$src2, (loadv4i32 addr:$src3)))]>, - Sched<[WriteVecIMul]>, VEX_4V; + Sched<[WriteVecIMul]>, VEX, VVVV; } multiclass VSM3RNDS2_Base { @@ -8353,9 +8389,9 @@ let Predicates = [HasSM3], Constraints = "$src1 = $dst" in { } } -defm VSM3MSG1 : SM3_Base<"vsm3msg1">, T8PS; -defm VSM3MSG2 : SM3_Base<"vsm3msg2">, T8PD; -defm VSM3RNDS2 : VSM3RNDS2_Base, VEX_4V, TAPD; +defm VSM3MSG1 : SM3_Base<"vsm3msg1">, T8; +defm VSM3MSG2 : SM3_Base<"vsm3msg2">, T8, PD; +defm VSM3RNDS2 : VSM3RNDS2_Base, VEX, VVVV, TA, PD; // FIXME: Is there a better scheduler class for SM4 than WriteVecIMul? let Predicates = [HasSM4] in { @@ -8376,10 +8412,10 @@ let Predicates = [HasSM4] in { } } -defm VSM4KEY4 : SM4_Base<"vsm4key4", VR128, "128", loadv4i32, i128mem>, T8XS, VEX_4V; -defm VSM4KEY4Y : SM4_Base<"vsm4key4", VR256, "256", loadv8i32, i256mem>, T8XS, VEX_L, VEX_4V; -defm VSM4RNDS4 : SM4_Base<"vsm4rnds4", VR128, "128", loadv4i32, i128mem>, T8XD, VEX_4V; -defm VSM4RNDS4Y : SM4_Base<"vsm4rnds4", VR256, "256", loadv8i32, i256mem>, T8XD, VEX_L, VEX_4V; +defm VSM4KEY4 : SM4_Base<"vsm4key4", VR128, "128", loadv4i32, i128mem>, T8, XS, VEX, VVVV; +defm VSM4KEY4Y : SM4_Base<"vsm4key4", VR256, "256", loadv8i32, i256mem>, T8, XS, VEX_L, VEX, VVVV; +defm VSM4RNDS4 : SM4_Base<"vsm4rnds4", VR128, "128", loadv4i32, i128mem>, T8, XD, VEX, VVVV; +defm VSM4RNDS4Y : SM4_Base<"vsm4rnds4", VR256, "256", loadv8i32, i256mem>, T8, XD, VEX_L, VEX, VVVV; let Predicates = [HasAVXVNNIINT16], Constraints = "$src1 = $dst" in multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> { @@ -8390,7 +8426,7 @@ multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> { [(set VR128:$dst, (v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128") VR128:$src1, VR128:$src2, VR128:$src3)))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; def rm : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i128mem:$src3), @@ -8398,7 +8434,7 @@ multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> { [(set VR128:$dst, (v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128") VR128:$src1, VR128:$src2, (loadv4i32 addr:$src3))))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; let isCommutable = IsCommutable in def Yrr : I<opc, MRMSrcReg, (outs VR256:$dst), @@ -8407,7 +8443,7 @@ multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> { [(set VR256:$dst, (v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256") VR256:$src1, VR256:$src2, VR256:$src3)))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; + VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; def Yrm : I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i256mem:$src3), @@ -8415,12 +8451,12 @@ multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> { [(set VR256:$dst, (v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256") VR256:$src1, VR256:$src2, (loadv8i32 addr:$src3))))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; + VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; } -defm VPDPWSUD : avx_vnni_int16<0xd2, "vpdpwsud", 0>, T8XS; -defm VPDPWSUDS : avx_vnni_int16<0xd3, "vpdpwsuds", 0>, T8XS; -defm VPDPWUSD : avx_vnni_int16<0xd2, "vpdpwusd", 0>, T8PD; -defm VPDPWUSDS : avx_vnni_int16<0xd3, "vpdpwusds", 0>, T8PD; -defm VPDPWUUD : avx_vnni_int16<0xd2, "vpdpwuud", 1>, T8PS; -defm VPDPWUUDS : avx_vnni_int16<0xd3, "vpdpwuuds", 1>, T8PS; +defm VPDPWSUD : avx_vnni_int16<0xd2, "vpdpwsud", 0>, T8, XS; +defm VPDPWSUDS : avx_vnni_int16<0xd3, "vpdpwsuds", 0>, T8, XS; +defm VPDPWUSD : avx_vnni_int16<0xd2, "vpdpwusd", 0>, T8, PD; +defm VPDPWUSDS : avx_vnni_int16<0xd3, "vpdpwusds", 0>, T8, PD; +defm VPDPWUUD : avx_vnni_int16<0xd2, "vpdpwuud", 1>, T8; +defm VPDPWUUDS : avx_vnni_int16<0xd3, "vpdpwuuds", 1>, T8; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td index 48bf23f8cbf7..d13e3b7af69a 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -829,12 +829,12 @@ multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop, let hasSideEffects = 0 in { def ri#Suffix : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - TAXD, VEX, Sched<[WriteShift]>; + TA, XD, VEX, Sched<[WriteShift]>; let mayLoad = 1 in def mi#Suffix : Ii8<0xF0, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, u8imm:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - TAXD, VEX, Sched<[WriteShiftLd]>; + TA, XD, VEX, Sched<[WriteShiftLd]>; } } @@ -860,23 +860,23 @@ let hasSideEffects = 0 in { let Predicates = [HasBMI2, NoEGPR] in { defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>; defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, REX_W; - defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS; - defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, REX_W; - defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD; - defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, REX_W; - defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD; - defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, REX_W; + defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8, XS; + defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8, XS, REX_W; + defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8, XD; + defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8, XD, REX_W; + defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, PD; + defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, PD, REX_W; } let Predicates = [HasBMI2, HasEGPR] in { defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem, "_EVEX">, EVEX; defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem, "_EVEX">, REX_W, EVEX; - defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8XS, EVEX; - defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem, "_EVEX">, T8XS, REX_W, EVEX; - defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem, "_EVEX">, T8XD, EVEX; - defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem, "_EVEX">, T8XD, REX_W, EVEX; - defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem, "_EVEX">, T8PD, EVEX; - defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8PD, REX_W, EVEX; + defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8, XS, EVEX; + defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem, "_EVEX">, T8, XS, REX_W, EVEX; + defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem, "_EVEX">, T8, XD, EVEX; + defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem, "_EVEX">, T8, XD, REX_W, EVEX; + defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem, "_EVEX">, T8, PD, EVEX; + defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W, EVEX; } let Predicates = [HasBMI2] in { diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td index cbb5d4ed5bbd..efb58c6102dd 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td @@ -426,31 +426,31 @@ let SchedRW = [WriteSystem] in { let Uses = [EAX, ECX, EDX] in def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB; let Uses = [EAX, ECX, EDX] in -def WRMSRNS : I<0x01, MRM_C6, (outs), (ins), "wrmsrns", []>, PS; +def WRMSRNS : I<0x01, MRM_C6, (outs), (ins), "wrmsrns", []>, TB; let Defs = [EAX, EDX], Uses = [ECX] in def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB; let Defs = [RAX, EFLAGS], Uses = [RBX, RCX], Predicates = [In64BitMode] in -def PBNDKB : I<0x01, MRM_C7, (outs), (ins), "pbndkb", []>, PS; +def PBNDKB : I<0x01, MRM_C7, (outs), (ins), "pbndkb", []>, TB; let Uses = [RSI, RDI, RCX], Predicates = [In64BitMode] in { -def WRMSRLIST : I<0x01, MRM_C6, (outs), (ins), "wrmsrlist", []>, XS; -def RDMSRLIST : I<0x01, MRM_C6, (outs), (ins), "rdmsrlist", []>, XD; +def WRMSRLIST : I<0x01, MRM_C6, (outs), (ins), "wrmsrlist", []>, TB, XS; +def RDMSRLIST : I<0x01, MRM_C6, (outs), (ins), "rdmsrlist", []>, TB, XD; } let Predicates = [HasUSERMSR], mayLoad = 1 in { def URDMSRrr : I<0xf8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "urdmsr\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (int_x86_urdmsr GR64:$src))]>, T8XD; + [(set GR64:$dst, (int_x86_urdmsr GR64:$src))]>, T8, XD; def URDMSRri : Ii32<0xf8, MRM0r, (outs GR64:$dst), (ins i64i32imm:$imm), "urdmsr\t{$imm, $dst|$dst, $imm}", - [(set GR64:$dst, (int_x86_urdmsr i64immSExt32_su:$imm))]>, T_MAP7XD, VEX; + [(set GR64:$dst, (int_x86_urdmsr i64immSExt32_su:$imm))]>, T_MAP7, XD, VEX; } let Predicates = [HasUSERMSR], mayStore = 1 in { def UWRMSRrr : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), "uwrmsr\t{$src1, $src2|$src2, $src1}", - [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8XS; + [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8, XS; def UWRMSRir : Ii32<0xf8, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm), "uwrmsr\t{$src, $imm|$imm, $src}", - [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7XS, VEX; + [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7, XS, VEX; } let Defs = [RAX, RDX], Uses = [ECX] in def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; @@ -481,12 +481,12 @@ let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in // Cache instructions let SchedRW = [WriteSystem] in { def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; -def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, PS; +def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, TB, PS; // wbnoinvd is like wbinvd, except without invalidation // encoding: like wbinvd + an 0xF3 prefix def WBNOINVD : I<0x09, RawFrm, (outs), (ins), "wbnoinvd", - [(int_x86_wbnoinvd)]>, XS, + [(int_x86_wbnoinvd)]>, TB, XS, Requires<[HasWBNOINVD]>; } // SchedRW @@ -497,57 +497,74 @@ let SchedRW = [WriteSystem] in { let Uses = [SSP] in { let Defs = [SSP] in { def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src", - [(int_x86_incsspd GR32:$src)]>, XS; + [(int_x86_incsspd GR32:$src)]>, TB, XS; def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src", - [(int_x86_incsspq GR64:$src)]>, XS; + [(int_x86_incsspq GR64:$src)]>, TB, XS; } // Defs SSP let Constraints = "$src = $dst" in { def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src), "rdsspd\t$dst", - [(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS; + [(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, TB, XS; def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src), "rdsspq\t$dst", - [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS; + [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, TB, XS; } let Defs = [SSP] in { def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp", - [(int_x86_saveprevssp)]>, XS; + [(int_x86_saveprevssp)]>, TB, XS; def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src), "rstorssp\t$src", - [(int_x86_rstorssp addr:$src)]>, XS; + [(int_x86_rstorssp addr:$src)]>, TB, XS; } // Defs SSP } // Uses SSP +let Predicates = [NoEGPR] in { def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "wrssd\t{$src, $dst|$dst, $src}", - [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8PS; + [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8; def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "wrssq\t{$src, $dst|$dst, $src}", - [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8PS; + [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8; def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "wrussd\t{$src, $dst|$dst, $src}", - [(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD; + [(int_x86_wrussd GR32:$src, addr:$dst)]>, T8, PD; def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "wrussq\t{$src, $dst|$dst, $src}", - [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD; + [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8, PD; +} + +let Predicates = [HasEGPR, In64BitMode] in { + def WRSSD_EVEX : I<0x66, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "wrssd\t{$src, $dst|$dst, $src}", + [(int_x86_wrssd GR32:$src, addr:$dst)]>, EVEX, NoCD8, T_MAP4; + def WRSSQ_EVEX : RI<0x66, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "wrssq\t{$src, $dst|$dst, $src}", + [(int_x86_wrssq GR64:$src, addr:$dst)]>, EVEX, NoCD8, T_MAP4; + def WRUSSD_EVEX : I<0x65, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "wrussd\t{$src, $dst|$dst, $src}", + [(int_x86_wrussd GR32:$src, addr:$dst)]>, EVEX, NoCD8, T_MAP4, PD; + def WRUSSQ_EVEX : RI<0x65, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "wrussq\t{$src, $dst|$dst, $src}", + [(int_x86_wrussq GR64:$src, addr:$dst)]>, EVEX, NoCD8, T_MAP4, PD; +} let Defs = [SSP] in { let Uses = [SSP] in { def SETSSBSY : I<0x01, MRM_E8, (outs), (ins), "setssbsy", - [(int_x86_setssbsy)]>, XS; + [(int_x86_setssbsy)]>, TB, XS; } // Uses SSP def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src), "clrssbsy\t$src", - [(int_x86_clrssbsy addr:$src)]>, XS; + [(int_x86_clrssbsy addr:$src)]>, TB, XS; } // Defs SSP } // SchedRW let SchedRW = [WriteSystem] in { - def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, XS; - def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, XS; + def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, TB, XS; + def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, TB, XS; } // SchedRW //===----------------------------------------------------------------------===// @@ -557,51 +574,51 @@ let SchedRW = [WriteSystem] in { // on Windows without needing to enable the xsave feature to be compatible with // MSVC. let Defs = [EDX, EAX], Uses = [ECX] in -def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, PS; +def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; let Uses = [EDX, EAX, ECX] in def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", - [(int_x86_xsetbv ECX, EDX, EAX)]>, PS; + [(int_x86_xsetbv ECX, EDX, EAX)]>, TB; let Uses = [EDX, EAX] in { def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst), "xsave\t$dst", - [(int_x86_xsave addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>; + [(int_x86_xsave addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE]>; def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaquemem:$dst), "xsave64\t$dst", - [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>; + [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>; def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaquemem:$dst), "xrstor\t$dst", - [(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>; + [(int_x86_xrstor addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE]>; def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaquemem:$dst), "xrstor64\t$dst", - [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>; + [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>; def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaquemem:$dst), "xsaveopt\t$dst", - [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT]>; + [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEOPT]>; def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaquemem:$dst), "xsaveopt64\t$dst", - [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>; + [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEOPT, In64BitMode]>; def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaquemem:$dst), "xsavec\t$dst", - [(int_x86_xsavec addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEC]>; + [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>; def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaquemem:$dst), "xsavec64\t$dst", - [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEC, In64BitMode]>; + [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>; def XSAVES : I<0xC7, MRM5m, (outs), (ins opaquemem:$dst), "xsaves\t$dst", - [(int_x86_xsaves addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES]>; + [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>; def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaquemem:$dst), "xsaves64\t$dst", - [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>; + [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>; def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaquemem:$dst), "xrstors\t$dst", - [(int_x86_xrstors addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES]>; + [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>; def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst), "xrstors64\t$dst", - [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES, In64BitMode]>; + [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>; } // Uses } // SchedRW @@ -634,10 +651,10 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in let SchedRW = [WriteSystem] in { let Defs = [EAX, EDX], Uses = [ECX] in def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", - [(set EAX, (X86rdpkru ECX)), (implicit EDX)]>, PS; + [(set EAX, (X86rdpkru ECX)), (implicit EDX)]>, TB; let Uses = [EAX, ECX, EDX] in def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", - [(X86wrpkru EAX, EDX, ECX)]>, PS; + [(X86wrpkru EAX, EDX, ECX)]>, TB; } // SchedRW //===----------------------------------------------------------------------===// @@ -645,28 +662,28 @@ let Uses = [EAX, ECX, EDX] in let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in { def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins), "rdfsbase{l}\t$dst", - [(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS; + [(set GR32:$dst, (int_x86_rdfsbase_32))]>, TB, XS; def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins), "rdfsbase{q}\t$dst", - [(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS; + [(set GR64:$dst, (int_x86_rdfsbase_64))]>, TB, XS; def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins), "rdgsbase{l}\t$dst", - [(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS; + [(set GR32:$dst, (int_x86_rdgsbase_32))]>, TB, XS; def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins), "rdgsbase{q}\t$dst", - [(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS; + [(set GR64:$dst, (int_x86_rdgsbase_64))]>, TB, XS; def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src), "wrfsbase{l}\t$src", - [(int_x86_wrfsbase_32 GR32:$src)]>, XS; + [(int_x86_wrfsbase_32 GR32:$src)]>, TB, XS; def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src), "wrfsbase{q}\t$src", - [(int_x86_wrfsbase_64 GR64:$src)]>, XS; + [(int_x86_wrfsbase_64 GR64:$src)]>, TB, XS; def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src), "wrgsbase{l}\t$src", - [(int_x86_wrgsbase_32 GR32:$src)]>, XS; + [(int_x86_wrgsbase_32 GR32:$src)]>, TB, XS; def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src), "wrgsbase{q}\t$src", - [(int_x86_wrgsbase_64 GR64:$src)]>, XS; + [(int_x86_wrgsbase_64 GR64:$src)]>, TB, XS; } //===----------------------------------------------------------------------===// @@ -674,15 +691,15 @@ let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in { let SchedRW = [WriteSystem] in { def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", - [(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8PD, + [(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8, PD, Requires<[Not64BitMode, HasINVPCID]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, Requires<[In64BitMode, HasINVPCID]>; def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, - EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode, HasINVPCID]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>; } // SchedRW let Predicates = [In64BitMode, HasINVPCID] in { @@ -701,15 +718,15 @@ let Predicates = [In64BitMode, HasINVPCID] in { //===----------------------------------------------------------------------===// // SMAP Instruction let Defs = [EFLAGS], SchedRW = [WriteSystem] in { - def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, PS; - def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, PS; + def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB; + def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB; } //===----------------------------------------------------------------------===// // SMX Instruction let SchedRW = [WriteSystem] in { let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in { - def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, PS; + def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB; } // Uses, Defs } // SchedRW @@ -730,9 +747,9 @@ def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>; // RDPID Instruction let SchedRW = [WriteSystem] in { def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins), - "rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, XS, + "rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, TB, XS, Requires<[Not64BitMode, HasRDPID]>; -def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, XS, +def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, TB, XS, Requires<[In64BitMode, HasRDPID]>; } // SchedRW @@ -748,17 +765,17 @@ let Predicates = [In64BitMode, HasRDPID] in { // PTWRITE Instruction - Write Data to a Processor Trace Packet let SchedRW = [WriteSystem] in { def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst), - "ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, XS, + "ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, TB, XS, Requires<[HasPTWRITE]>; def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst), - "ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, XS, + "ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, TB, XS, Requires<[In64BitMode, HasPTWRITE]>; def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst), - "ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, XS, + "ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, TB, XS, Requires<[HasPTWRITE]>; def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst), - "ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, XS, + "ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, TB, XS, Requires<[In64BitMode, HasPTWRITE]>; } // SchedRW @@ -767,7 +784,7 @@ def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst), let SchedRW = [WriteSystem] in { let Uses = [ECX], Defs = [EAX, EDX] in - def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, PS, + def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, TB, Requires<[HasRDPRU]>; } @@ -786,6 +803,6 @@ let Uses = [ECX], Defs = [EAX, EDX] in let SchedRW = [WriteSystem] in { let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX, RDX, EFLAGS] in - def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, PS, + def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, TB, Requires<[HasPCONFIG]>; } // SchedRW diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td index ed514038a12e..09200f0c1a9f 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td @@ -46,11 +46,11 @@ multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem, let hasSideEffects = 0 in { def rr : I<opc, FormReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"), []>, - XOP_4V, XOP9, Sched<[Sched]>; + XOP, VVVV, XOP9, Sched<[Sched]>; let mayLoad = 1 in def rm : I<opc, FormMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"), []>, - XOP_4V, XOP9, Sched<[Sched.Folded]>; + XOP, VVVV, XOP9, Sched<[Sched.Folded]>; } } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td index 8d7cd6082095..fe01677b2ea1 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td @@ -17,23 +17,17 @@ // 64-bit only instructions let SchedRW = [WriteSystem], Predicates = [In64BitMode] in { // SEAMCALL - Call to SEAM VMX-root Operation Module -def SEAMCALL : I<0x01, MRM_CF, (outs), (ins), - "seamcall", []>, PD; +def SEAMCALL : I<0x01, MRM_CF, (outs), (ins), "seamcall", []>, TB, PD; // SEAMRET - Return to Legacy VMX-root Operation -def SEAMRET : I<0x01, MRM_CD, (outs), (ins), - "seamret", []>, PD; +def SEAMRET : I<0x01, MRM_CD, (outs), (ins), "seamret", []>, TB, PD; // SEAMOPS - SEAM Operations -def SEAMOPS : I<0x01, MRM_CE, (outs), (ins), - "seamops", []>, PD; - +def SEAMOPS : I<0x01, MRM_CE, (outs), (ins), "seamops", []>, TB, PD; } // SchedRW // common instructions let SchedRW = [WriteSystem] in { // TDCALL - Call SEAM Module Functions -def TDCALL : I<0x01, MRM_CC, (outs), (ins), - "tdcall", []>, PD; - +def TDCALL : I<0x01, MRM_CC, (outs), (ins), "tdcall", []>, TB, PD; } // SchedRW diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td index 7671eb4676ee..57604b682d54 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td @@ -37,11 +37,11 @@ def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>; } def XEND : I<0x01, MRM_D5, (outs), (ins), - "xend", [(int_x86_xend)]>, PS, Requires<[HasRTM]>; + "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>; let Defs = [EFLAGS] in def XTEST : I<0x01, MRM_D6, (outs), (ins), - "xtest", [(set EFLAGS, (X86xtest))]>, PS, Requires<[HasRTM]>; + "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>; def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), "xabort\t$imm", diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td index 2f056f2ead62..9499753143d9 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td @@ -27,50 +27,26 @@ class REP { bit hasREPPrefix = 1; } class TB { Map OpMap = TB; } class T8 { Map OpMap = T8; } class TA { Map OpMap = TA; } -class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; } -class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; } -class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; } +class T_MAP4 { Map OpMap = T_MAP4; } +class T_MAP5 { Map OpMap = T_MAP5; } +class T_MAP6 { Map OpMap = T_MAP6; } +class T_MAP7 { Map OpMap = T_MAP7; } +class XOP8 { Map OpMap = XOP8; } +class XOP9 { Map OpMap = XOP9; } +class XOPA { Map OpMap = XOPA; } class ThreeDNow { Map OpMap = ThreeDNow; } -class T_MAP4 { Map OpMap = T_MAP4; } -class T_MAP4PS : T_MAP4 { Prefix OpPrefix = PS; } // none -class T_MAP4PD : T_MAP4 { Prefix OpPrefix = PD; } // 0x66 -class T_MAP4XS : T_MAP4 { Prefix OpPrefix = XS; } // 0xF3 -class T_MAP4XD : T_MAP4 { Prefix OpPrefix = XD; } // 0xF2 -class T_MAP5 { Map OpMap = T_MAP5; } -class T_MAP5PS : T_MAP5 { Prefix OpPrefix = PS; } // none -class T_MAP5PD : T_MAP5 { Prefix OpPrefix = PD; } // 0x66 -class T_MAP5XS : T_MAP5 { Prefix OpPrefix = XS; } // 0xF3 -class T_MAP5XD : T_MAP5 { Prefix OpPrefix = XD; } // 0xF2 -class T_MAP6 { Map OpMap = T_MAP6; } -class T_MAP6PS : T_MAP6 { Prefix OpPrefix = PS; } -class T_MAP6PD : T_MAP6 { Prefix OpPrefix = PD; } -class T_MAP6XS : T_MAP6 { Prefix OpPrefix = XS; } -class T_MAP6XD : T_MAP6 { Prefix OpPrefix = XD; } -class T_MAP7 { Map OpMap = T_MAP7; } -class T_MAP7XS : T_MAP7 { Prefix OpPrefix = XS; } // 0xF3 -class T_MAP7XD : T_MAP7 { Prefix OpPrefix = XD; } // 0xF2 -class OBXS { Prefix OpPrefix = XS; } -class PS : TB { Prefix OpPrefix = PS; } -class PD : TB { Prefix OpPrefix = PD; } -class XD : TB { Prefix OpPrefix = XD; } -class XS : TB { Prefix OpPrefix = XS; } -class T8PS : T8 { Prefix OpPrefix = PS; } -class T8PD : T8 { Prefix OpPrefix = PD; } -class T8XD : T8 { Prefix OpPrefix = XD; } -class T8XS : T8 { Prefix OpPrefix = XS; } -class TAPS : TA { Prefix OpPrefix = PS; } -class TAPD : TA { Prefix OpPrefix = PD; } -class TAXD : TA { Prefix OpPrefix = XD; } -class TAXS : TA { Prefix OpPrefix = XS; } +class PS { Prefix OpPrefix = PS; } +class PD { Prefix OpPrefix = PD; } +class XD { Prefix OpPrefix = XD; } +class XS { Prefix OpPrefix = XS; } class VEX { Encoding OpEnc = EncVEX; } class WIG { bit IgnoresW = 1; } // Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX. class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; } -class VEX_4V : VEX { bit hasVEX_4V = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } +class VVVV { bit hasVEX_4V = 1; } class EVEX { Encoding OpEnc = EncEVEX; } -class EVEX_4V : EVEX { bit hasVEX_4V = 1; } class EVEX_K { bit hasEVEX_K = 1; } class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; } class EVEX_B { bit hasEVEX_B = 1; } @@ -86,29 +62,28 @@ class EVEX_CD8<int esize, CD8VForm form> { int CD8_EltSize = !srl(esize, 3); bits<3> CD8_Form = form.Value; } -class EVEX_NoCD8 : EVEX { bits<7> CD8_Scale = 0; } +class NoCD8 { bits<7> CD8_Scale = 0; } class XOP { Encoding OpEnc = EncXOP; } -class XOP_4V : XOP { bit hasVEX_4V = 1; } class EVEX2VEXOverride<string VEXInstrName> { string EVEX2VEXOverride = VEXInstrName; } -class AVX512BIi8Base : PD { +class AVX512BIi8Base : TB, PD { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; } -class AVX512XSIi8Base : XS { +class AVX512XSIi8Base : TB, XS { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; } -class AVX512XDIi8Base : XD { +class AVX512XDIi8Base : TB, XD { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; } -class AVX512PSIi8Base : PS { +class AVX512PSIi8Base : TB { Domain ExeDomain = SSEPackedSingle; ImmType ImmT = Imm8; } -class AVX512PDIi8Base : PD { +class AVX512PDIi8Base : TB, PD { Domain ExeDomain = SSEPackedDouble; ImmType ImmT = Imm8; } @@ -116,6 +91,14 @@ class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; } class ExplicitREX2Prefix { ExplicitOpPrefix explicitOpPrefix = ExplicitREX2; } class ExplicitVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitVEX; } class ExplicitEVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitEVEX; } +class DefEFLAGS { list<Register> Defs = [EFLAGS]; } +class UseEFLAGS { list<Register> Uses = [EFLAGS]; } +class DisassembleOnly { + // The disassembler should know about this, but not the asmparser. + bit isCodeGenOnly = 1; + bit ForceDisassemble = 1; +} + // SchedModel info for instruction that loads one value and gets the second // (and possibly third) value from a register. @@ -139,8 +122,7 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass, Operand immoperand, SDPatternOperator immoperator, SDPatternOperator immnosuoperator, Operand imm8operand, SDPatternOperator imm8operator, SDPatternOperator imm8nosuoperator, - bit hasOddOpcode, OperandSize opSize, - bit hasREX_W> { + bit hasEvenOpcode, bit hasREX_W> { /// VT - This is the value type itself. ValueType VT = vt; @@ -189,15 +171,10 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass, SDPatternOperator Imm8NoSuOperator = imm8nosuoperator; - /// HasOddOpcode - This bit is true if the instruction should have an odd (as - /// opposed to even) opcode. Operations on i8 are usually even, operations on - /// other datatypes are odd. - bit HasOddOpcode = hasOddOpcode; - - /// OpSize - Selects whether the instruction needs a 0x66 prefix based on - /// 16-bit vs 32-bit mode. i8/i64 set this to OpSizeFixed. i16 sets this - /// to Opsize16. i32 sets this to OpSize32. - OperandSize OpSize = opSize; + /// HasEvenOpcode - This bit is true if the instruction should have an even (as + /// opposed to odd) opcode. Operations on i8 are even, operations on + /// other datatypes are usually odd. + bit HasEvenOpcode = hasEvenOpcode; /// HasREX_W - This bit is set to true if the instruction should have /// the 0x40 REX prefix. This is set for i64 types. @@ -208,16 +185,16 @@ def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">; def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem, Imm8, i8imm, imm_su, imm, i8imm, invalid_node, invalid_node, - 0, OpSizeFixed, 0>; + 1, 0>; def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, Imm16, i16imm, imm_su, imm, i16i8imm, i16immSExt8_su, i16immSExt8, - 1, OpSize16, 0>; + 0, 0>; def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem, Imm32, i32imm, imm_su, imm, i32i8imm, i32immSExt8_su, i32immSExt8, - 1, OpSize32, 0>; + 0, 0>; def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, Imm32S, i64i32imm, - i64immSExt32_su, i64immSExt32, i64i8imm, i64immSExt8_su, - i64immSExt8, 1, OpSizeFixed, 1>; + i64immSExt32_su, i64immSExt32, i64i8imm, i64immSExt8_su, + i64immSExt8, 0, 1>; // Group template arguments that can be derived from the vector type (EltNum x // EltVT). These are things like the register class for the writemask, etc. @@ -585,26 +562,26 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>; + : I<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[UseSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>; + : Ii8<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[UseSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS, + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, Requires<[UseSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, Requires<[UseSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, + : I<o, F, outs, ins, !strconcat("v", asm), pattern>, TB, XS, Requires<[HasAVX]>; class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS, - Requires<[HasAVX]>; + : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, + TB, Requires<[HasAVX]>; // SSE2 Instruction Templates: // @@ -626,49 +603,49 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>; + : I<o, F, outs, ins, asm, pattern>, TB, XD, Requires<[UseSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern>, TB, XD, Requires<[UseSSE2]>; class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>; + : I<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[UseSSE2]>; class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[UseSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, PD, Requires<[UseSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, PD, Requires<[UseSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, + : I<o, F, outs, ins, !strconcat("v", asm), pattern>, TB, XD, Requires<[UseAVX]>; class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, + : I<o, F, outs, ins, !strconcat("v", asm), pattern>, TB, XS, Requires<[HasAVX]>; class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, - PD, Requires<[HasAVX]>; + TB, PD, Requires<[HasAVX]>; class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD, + : I<o, F, outs, ins, !strconcat("v", asm), pattern>, TB, PD, Requires<[UseAVX]>; class S2I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>; + : I<o, F, outs, ins, asm, pattern>, TB, PD, Requires<[UseSSE2]>; class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern>, TB, XD, Requires<[HasMMX, HasSSE2]>; class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[HasMMX, HasSSE2]>; // SSE3 Instruction Templates: // @@ -678,15 +655,15 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS, + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, XS, Requires<[UseSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD, + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, XD, Requires<[UseSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, PD, Requires<[UseSSE3]>; @@ -703,19 +680,19 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD, Requires<[UseSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[UseSSSE3]>; class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, Requires<[HasMMX, HasSSSE3]>; class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, Requires<[HasMMX, HasSSSE3]>; // SSE4.1 Instruction Templates: @@ -725,11 +702,11 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, // class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD, Requires<[UseSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[UseSSE41]>; // SSE4.2 Instruction Templates: @@ -737,13 +714,13 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD, Requires<[UseSSE42]>; // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[UseSSE42]>; // CRC32I - SSE 4.2 CRC32 instructions. @@ -751,42 +728,42 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, // controlled by the SSE42 flag. class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>; + : I<o, F, outs, ins, asm, pattern>, T8, XD, Requires<[HasCRC32]>; // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) // -// AVX8I - AVX instructions with T8PD prefix. -// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8. +// AVX8I - AVX instructions with T8, PD prefix. +// AVXAIi8 - AVX instructions with TA, PD prefix and ImmT = Imm8. class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD, Requires<[HasAVX]>; class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[HasAVX]>; // AVX2 Instruction Templates: // Instructions introduced in AVX2 (no SSE equivalent forms) // -// AVX28I - AVX2 instructions with T8PD prefix. -// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8. +// AVX28I - AVX2 instructions with T8, PD prefix. +// AVX2AIi8 - AVX2 instructions with TA, PD prefix and ImmT = Imm8. class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD, Requires<[HasAVX2]>; class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[HasAVX2]>; // AVX-512 Instruction Templates: // Instructions introduced in AVX-512 (no SSE equivalent forms) // -// AVX5128I - AVX-512 instructions with T8PD prefix. -// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8. +// AVX5128I - AVX-512 instructions with T8, PD prefix. +// AVX512AIi8 - AVX-512 instructions with TA, PD prefix and ImmT = Imm8. // AVX512PDI - AVX-512 instructions with PD, double packed. // AVX512PSI - AVX-512 instructions with PS, single packed. // AVX512XS8I - AVX-512 instructions with T8 and XS prefixes. @@ -796,39 +773,39 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD, Requires<[HasAVX512]>; -class AVX5128IBase : T8PD { +class AVX5128IBase : T8, PD { Domain ExeDomain = SSEPackedInt; } class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, XS, Requires<[HasAVX512]>; class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XS, + : I<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[HasAVX512]>; class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TB, XD, Requires<[HasAVX512]>; class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TB, PD, Requires<[HasAVX512]>; -class AVX512BIBase : PD { +class AVX512BIBase : TB, PD { Domain ExeDomain = SSEPackedInt; } class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TB, PD, Requires<[HasAVX512]>; class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[HasAVX512]>; -class AVX512AIi8Base : TAPD { +class AVX512AIi8Base : TA, PD { ImmType ImmT = Imm8; } class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm, @@ -837,11 +814,11 @@ class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX512]>; class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, PD, Requires<[HasAVX512]>; class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS, + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, Requires<[HasAVX512]>; class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, Domain d> @@ -851,8 +828,8 @@ class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm, : I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>; class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : I<o, F, outs, ins, asm, pattern>, T8PD, - EVEX_4V, Requires<[HasAVX512]>; + : I<o, F, outs, ins, asm, pattern>, T8, PD, + EVEX, VVVV, Requires<[HasAVX512]>; class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> @@ -864,46 +841,46 @@ class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm, // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD, + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD, Requires<[NoAVX, HasAES]>; class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD, + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[NoAVX, HasAES]>; // PCLMUL Instruction Templates class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD; // FMA3 Instruction Templates class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : I<o, F, outs, ins, asm, pattern>, T8PD, - VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>; + : I<o, F, outs, ins, asm, pattern>, T8, PD, + VEX, VVVV, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>; class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : I<o, F, outs, ins, asm, pattern>, T8PD, - VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>; + : I<o, F, outs, ins, asm, pattern>, T8, PD, + VEX, VVVV, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>; class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : I<o, F, outs, ins, asm, pattern>, T8PD, - VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>; + : I<o, F, outs, ins, asm, pattern>, T8, PD, + VEX, VVVV, FMASC, Requires<[HasFMA, NoAVX512]>; // FMA4 Instruction Templates class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD, - VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>; + : Ii8Reg<o, F, outs, ins, asm, pattern>, TA, PD, + VEX, VVVV, FMASC, Requires<[HasFMA4, NoVLX]>; class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD, - VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>; + : Ii8Reg<o, F, outs, ins, asm, pattern>, TA, PD, + VEX, VVVV, FMASC, Requires<[HasFMA4, NoAVX512]>; class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD, - VEX_4V, FMASC, Requires<[HasFMA4]>; + : Ii8Reg<o, F, outs, ins, asm, pattern>, TA, PD, + VEX, VVVV, FMASC, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm, @@ -925,8 +902,8 @@ class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm, // XOP 5 operand instruction (VEX encoding!) class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD, - VEX_4V, Requires<[HasXOP]>; + : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, + VEX, VVVV, Requires<[HasXOP]>; // X86-64 Instruction templates... // @@ -959,57 +936,36 @@ class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm, // MMX Instruction templates // - // MMXI - MMX instructions with TB prefix. -// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode. -// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode. -// MMX2I - MMX / SSE2 instructions with PD prefix. -// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix. +// MMXRI - MMX instructions with TB prefix and REX.W. // MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix. -// MMXID - MMX instructions with XD prefix. -// MMXIS - MMX instructions with XS prefix. class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>; -class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>; -class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>; + : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>; class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, PS, REX_W, + : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX,In64BitMode]>; -class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>; class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>; -class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>; -class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>; + : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>; /// ITy - This instruction base class takes the type info for the instruction. /// Using this, it: /// 1. Concatenates together the instruction mnemonic with the appropriate /// suffix letter, a tab, and the arguments. -/// 2. Infers whether the instruction should have a 0x66 prefix byte. -/// 3. Infers whether the instruction should have a 0x40 REX_W prefix. -/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations) +/// 2. Infers whether the instruction should have a 0x40 REX_W prefix. +/// 3. Infers whether the low bit of the opcode should be 0 (for i8 operations) /// or 1 (for i16,i32,i64 operations). class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, string mnemonic, string args, list<dag> pattern> : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, - opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode }, - f, outs, ins, + opcode{3}, opcode{2}, opcode{1}, + !if(!eq(typeinfo.HasEvenOpcode, 1), 0, opcode{0})}, f, outs, ins, !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> { - // Infer instruction prefixes from type info. - let OpSize = typeinfo.OpSize; + let hasSideEffects = 0; let hasREX_W = typeinfo.HasREX_W; } + +defvar binop_args = "{$src2, $src1|$src1, $src2}"; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td index 5289819119ce..7cc468fe15ad 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td @@ -17,33 +17,33 @@ let SchedRW = [WriteSystem] in { // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD, Requires<[Not64BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD, Requires<[In64BitMode]>; def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invept\t{$src2, $src1|$src1, $src2}", []>, - EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, Requires<[Not64BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, Requires<[In64BitMode]>; def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invvpid\t{$src2, $src1|$src1, $src2}", []>, - EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmclear\t$vmcs", []>, PD; + "vmclear\t$vmcs", []>, TB, PD; // OF 01 D4 -def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, PS; +def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB; // 0F 01 C2 def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; @@ -51,35 +51,35 @@ def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; // 0F 01 C3 def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmptrld\t$vmcs", []>, PS; + "vmptrld\t$vmcs", []>, TB; def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs), - "vmptrst\t$vmcs", []>, PS; + "vmptrst\t$vmcs", []>, TB; def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; + "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>; def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>; let mayStore = 1 in { def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; + "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>; def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>; } // mayStore def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>; def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>; let mayLoad = 1 in { def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>; def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>; } // mayLoad // 0F 01 C4 def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), - "vmxon\t$vmxon", []>, XS; + "vmxon\t$vmxon", []>, TB, XS; } // SchedRW diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td index a62bb2e855c9..1504d77bfb86 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td @@ -105,7 +105,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)))))]>, - XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold]>; + XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold]>; def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst), (ins i128mem:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -119,7 +119,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - XOP_4V, REX_W, Sched<[sched]>; + XOP, VVVV, REX_W, Sched<[sched]>; } let ExeDomain = SSEPackedInt in { @@ -173,7 +173,7 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V, + (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP, VVVV, Sched<[sched]>; def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, VR128:$src3), @@ -181,7 +181,7 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, (load addr:$src2), - VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + VR128:$src3))]>, XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } let ExeDomain = SSEPackedInt in { @@ -252,7 +252,7 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), timm:$cc)))]>, - XOP_4V, Sched<[sched]>; + XOP, VVVV, Sched<[sched]>; def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$cc), !strconcat("vpcom", Suffix, @@ -261,7 +261,7 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128, (vt128 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)), timm:$cc)))]>, - XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } def : Pat<(OpNode (load addr:$src2), @@ -288,7 +288,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), (vt128 VR128:$src3))))]>, - XOP_4V, Sched<[sched]>; + XOP, VVVV, Sched<[sched]>; def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i128mem:$src3), !strconcat(OpcodeStr, @@ -296,7 +296,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), (vt128 (load addr:$src3)))))]>, - XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; + XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, @@ -304,7 +304,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)), (vt128 VR128:$src3))))]>, - XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold, + XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, // 128mem:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -316,7 +316,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, REX_W, Sched<[sched]>; + []>, XOP, VVVV, REX_W, Sched<[sched]>; } let ExeDomain = SSEPackedInt in { @@ -333,7 +333,7 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1), - (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V, + (X86andnp RC:$src3, RC:$src2))))]>, XOP, VVVV, Sched<[sched]>; // FIXME: We can't write a pattern for this in tablegen. let hasSideEffects = 0, mayLoad = 1 in @@ -342,14 +342,14 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, - XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; + XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1), (X86andnp RC:$src3, (load addr:$src2)))))]>, - XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold, + XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, // x86memop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -361,7 +361,7 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, REX_W, Sched<[sched]>; + []>, XOP, VVVV, REX_W, Sched<[sched]>; } let ExeDomain = SSEPackedInt in { diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp index 82667b8cdbdb..c0fa9aa70324 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp @@ -68,7 +68,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, namespace llvm { std::unique_ptr<ScheduleDAGMutation> createX86MacroFusionDAGMutation() { - return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent); + return createMacroFusionDAGMutation(shouldScheduleAdjacent, + /*BranchOnly=*/true); } } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp index d63f1ca1695b..07f535685e8f 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp @@ -69,11 +69,11 @@ X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const { unsigned char X86Subtarget::classifyLocalReference(const GlobalValue *GV) const { + CodeModel::Model CM = TM.getCodeModel(); // Tagged globals have non-zero upper bits, which makes direct references - // require a 64-bit immediate. On the small code model this causes relocation - // errors, so we go through the GOT instead. - if (AllowTaggedGlobals && TM.getCodeModel() == CodeModel::Small && GV && - !isa<Function>(GV)) + // require a 64-bit immediate. With the small/medium code models this causes + // relocation errors, so we go through the GOT instead. + if (AllowTaggedGlobals && CM != CodeModel::Large && GV && !isa<Function>(GV)) return X86II::MO_GOTPCREL_NORELAX; // If we're not PIC, it's not very interesting. @@ -83,7 +83,6 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const { if (is64Bit()) { // 64-bit ELF PIC local references may use GOTOFF relocations. if (isTargetELF()) { - CodeModel::Model CM = TM.getCodeModel(); assert(CM != CodeModel::Tiny && "Tiny codesize model not supported on X86"); // In the large code model, all text is far from any global data, so we diff --git a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp index 10b80cad4347..6d3a59d532fd 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp @@ -134,13 +134,13 @@ ARM::EndianKind ARM::parseArchEndian(StringRef Arch) { } // Parse a branch protection specification, which has the form -// standard | none | [bti,pac-ret[+b-key,+leaf]*] +// standard | none | [bti,pac-ret[+b-key,+leaf,+pc]*] // Returns true on success, with individual elements of the specification // returned in `PBP`. Returns false in error, with `Err` containing // an erroneous part of the spec. bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, StringRef &Err) { - PBP = {"none", "a_key", false}; + PBP = {"none", "a_key", false, false}; if (Spec == "none") return true; // defaults are ok @@ -166,6 +166,8 @@ bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, PBP.Scope = "all"; else if (PACOpt == "b-key") PBP.Key = "b_key"; + else if (PACOpt == "pc") + PBP.BranchProtectionPAuthLR = true; else break; } diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp index e61fcb248fae..11c5000acc07 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp @@ -1131,37 +1131,59 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 23: CPU = "znver1"; *Type = X86::AMDFAM17H; - if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { + if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) || + (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) || + (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) || + (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) || + (Model >= 0xa0 && Model <= 0xaf)) { + // Family 17h Models 30h-3Fh (Starship) Zen 2 + // Family 17h Models 47h (Cardinal) Zen 2 + // Family 17h Models 60h-67h (Renoir) Zen 2 + // Family 17h Models 68h-6Fh (Lucienne) Zen 2 + // Family 17h Models 70h-7Fh (Matisse) Zen 2 + // Family 17h Models 84h-87h (ProjectX) Zen 2 + // Family 17h Models 90h-97h (VanGogh) Zen 2 + // Family 17h Models 98h-9Fh (Mero) Zen 2 + // Family 17h Models A0h-AFh (Mendocino) Zen 2 CPU = "znver2"; *Subtype = X86::AMDFAM17H_ZNVER2; - break; // 30h-3fh, 71h: Zen2 + break; } - if (Model <= 0x0f) { + if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) { + // Family 17h Models 10h-1Fh (Raven1) Zen + // Family 17h Models 10h-1Fh (Picasso) Zen+ + // Family 17h Models 20h-2Fh (Raven2 x86) Zen *Subtype = X86::AMDFAM17H_ZNVER1; - break; // 00h-0Fh: Zen1 + break; } break; case 25: CPU = "znver3"; *Type = X86::AMDFAM19H; - if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) { - // Family 19h Models 00h-0Fh - Zen3 - // Family 19h Models 20h-2Fh - Zen3 - // Family 19h Models 30h-3Fh - Zen3 - // Family 19h Models 40h-4Fh - Zen3+ - // Family 19h Models 50h-5Fh - Zen3+ + if ((Model >= 0x00 && Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || + (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || + (Model >= 0x50 && Model <= 0x5f)) { + // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 + // Family 19h Models 20h-2Fh (Vermeer) Zen 3 + // Family 19h Models 30h-3Fh (Badami) Zen 3 + // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+ + // Family 19h Models 50h-5Fh (Cezanne) Zen 3 *Subtype = X86::AMDFAM19H_ZNVER3; break; } - if ((Model >= 0x10 && Model <= 0x1f) || - (Model >= 0x60 && Model <= 0x74) || - (Model >= 0x78 && Model <= 0x7b) || - (Model >= 0xA0 && Model <= 0xAf)) { + if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) || + (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) || + (Model >= 0xa0 && Model <= 0xaf)) { + // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4 + // Family 19h Models 60h-6Fh (Raphael) Zen 4 + // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4 + // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4 + // Family 19h Models A0h-AFh (Stones-Dense) Zen 4 CPU = "znver4"; *Subtype = X86::AMDFAM19H_ZNVER4; break; // "znver4" } - break; // family 19h + break; default: break; // Unknown AMD CPU. } diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp index ac04dab04897..d475650c2d18 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp @@ -819,8 +819,6 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { } static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { - if (T.isOSDarwin()) - return Triple::MachO; switch (T.getArch()) { case Triple::UnknownArch: case Triple::aarch64: @@ -829,12 +827,13 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::thumb: case Triple::x86: case Triple::x86_64: - if (T.isOSWindows()) + switch (T.getOS()) { + case Triple::Win32: + case Triple::UEFI: return Triple::COFF; - else if (T.isUEFI()) - return Triple::COFF; - return Triple::ELF; - + default: + return T.isOSDarwin() ? Triple::MachO : Triple::ELF; + } case Triple::aarch64_be: case Triple::amdgcn: case Triple::amdil64: @@ -887,6 +886,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::ppc: if (T.isOSAIX()) return Triple::XCOFF; + if (T.isOSDarwin()) + return Triple::MachO; return Triple::ELF; case Triple::systemz: diff --git a/contrib/llvm-project/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp b/contrib/llvm-project/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp new file mode 100644 index 000000000000..40b57b5e40ea --- /dev/null +++ b/contrib/llvm-project/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp @@ -0,0 +1,429 @@ +//===- DylibReader.cpp -------------- TAPI MachO Dylib Reader --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Implements the TAPI Reader for Mach-O dynamic libraries. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/TextAPI/DylibReader.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Support/Endian.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/TextAPI/RecordsSlice.h" +#include "llvm/TextAPI/TextAPIError.h" +#include <iomanip> +#include <set> +#include <sstream> +#include <string> +#include <tuple> + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::MachO; +using namespace llvm::MachO::DylibReader; + +using TripleVec = std::vector<Triple>; +static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) { + auto I = partition_point(Container, [=](const Triple &CT) { + return std::forward_as_tuple(CT.getArch(), CT.getOS(), + CT.getEnvironment()) < + std::forward_as_tuple(T.getArch(), T.getOS(), T.getEnvironment()); + }); + + if (I != Container.end() && *I == T) + return I; + return Container.emplace(I, T); +} + +static TripleVec constructTriples(MachOObjectFile *Obj, + const Architecture ArchT) { + auto getOSVersionStr = [](uint32_t V) { + PackedVersion OSVersion(V); + std::string Vers; + raw_string_ostream VStream(Vers); + VStream << OSVersion; + return VStream.str(); + }; + auto getOSVersion = [&](const MachOObjectFile::LoadCommandInfo &cmd) { + auto Vers = Obj->getVersionMinLoadCommand(cmd); + return getOSVersionStr(Vers.version); + }; + + TripleVec Triples; + bool IsIntel = ArchitectureSet(ArchT).hasX86(); + auto Arch = getArchitectureName(ArchT); + + for (const auto &cmd : Obj->load_commands()) { + std::string OSVersion; + switch (cmd.C.cmd) { + case MachO::LC_VERSION_MIN_MACOSX: + OSVersion = getOSVersion(cmd); + emplace(Triples, {Arch, "apple", "macos" + OSVersion}); + break; + case MachO::LC_VERSION_MIN_IPHONEOS: + OSVersion = getOSVersion(cmd); + if (IsIntel) + emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"}); + else + emplace(Triples, {Arch, "apple", "ios" + OSVersion}); + break; + case MachO::LC_VERSION_MIN_TVOS: + OSVersion = getOSVersion(cmd); + if (IsIntel) + emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"}); + else + emplace(Triples, {Arch, "apple", "tvos" + OSVersion}); + break; + case MachO::LC_VERSION_MIN_WATCHOS: + OSVersion = getOSVersion(cmd); + if (IsIntel) + emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"}); + else + emplace(Triples, {Arch, "apple", "watchos" + OSVersion}); + break; + case MachO::LC_BUILD_VERSION: { + OSVersion = getOSVersionStr(Obj->getBuildVersionLoadCommand(cmd).minos); + switch (Obj->getBuildVersionLoadCommand(cmd).platform) { + case MachO::PLATFORM_MACOS: + emplace(Triples, {Arch, "apple", "macos" + OSVersion}); + break; + case MachO::PLATFORM_IOS: + emplace(Triples, {Arch, "apple", "ios" + OSVersion}); + break; + case MachO::PLATFORM_TVOS: + emplace(Triples, {Arch, "apple", "tvos" + OSVersion}); + break; + case MachO::PLATFORM_WATCHOS: + emplace(Triples, {Arch, "apple", "watchos" + OSVersion}); + break; + case MachO::PLATFORM_BRIDGEOS: + emplace(Triples, {Arch, "apple", "bridgeos" + OSVersion}); + break; + case MachO::PLATFORM_MACCATALYST: + emplace(Triples, {Arch, "apple", "ios" + OSVersion, "macabi"}); + break; + case MachO::PLATFORM_IOSSIMULATOR: + emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"}); + break; + case MachO::PLATFORM_TVOSSIMULATOR: + emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"}); + break; + case MachO::PLATFORM_WATCHOSSIMULATOR: + emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"}); + break; + case MachO::PLATFORM_DRIVERKIT: + emplace(Triples, {Arch, "apple", "driverkit" + OSVersion}); + break; + default: + break; // Skip any others. + } + break; + } + default: + break; + } + } + + // Record unknown platform for older binaries that don't enforce platform + // load commands. + if (Triples.empty()) + emplace(Triples, {Arch, "apple", "unknown"}); + + return Triples; +} + +static Error readMachOHeader(MachOObjectFile *Obj, RecordsSlice &Slice) { + auto H = Obj->getHeader(); + auto &BA = Slice.getBinaryAttrs(); + + switch (H.filetype) { + default: + llvm_unreachable("unsupported binary type"); + case MachO::MH_DYLIB: + BA.File = FileType::MachO_DynamicLibrary; + break; + case MachO::MH_DYLIB_STUB: + BA.File = FileType::MachO_DynamicLibrary_Stub; + break; + case MachO::MH_BUNDLE: + BA.File = FileType::MachO_Bundle; + break; + } + + if (H.flags & MachO::MH_TWOLEVEL) + BA.TwoLevelNamespace = true; + if (H.flags & MachO::MH_APP_EXTENSION_SAFE) + BA.AppExtensionSafe = true; + + for (const auto &LCI : Obj->load_commands()) { + switch (LCI.C.cmd) { + case MachO::LC_ID_DYLIB: { + auto DLLC = Obj->getDylibIDLoadCommand(LCI); + BA.InstallName = Slice.copyString(LCI.Ptr + DLLC.dylib.name); + BA.CurrentVersion = DLLC.dylib.current_version; + BA.CompatVersion = DLLC.dylib.compatibility_version; + break; + } + case MachO::LC_REEXPORT_DYLIB: { + auto DLLC = Obj->getDylibIDLoadCommand(LCI); + BA.RexportedLibraries.emplace_back( + Slice.copyString(LCI.Ptr + DLLC.dylib.name)); + break; + } + case MachO::LC_SUB_FRAMEWORK: { + auto SFC = Obj->getSubFrameworkCommand(LCI); + BA.ParentUmbrella = Slice.copyString(LCI.Ptr + SFC.umbrella); + break; + } + case MachO::LC_SUB_CLIENT: { + auto SCLC = Obj->getSubClientCommand(LCI); + BA.AllowableClients.emplace_back(Slice.copyString(LCI.Ptr + SCLC.client)); + break; + } + case MachO::LC_UUID: { + auto UUIDLC = Obj->getUuidCommand(LCI); + std::stringstream Stream; + for (unsigned I = 0; I < 16; ++I) { + if (I == 4 || I == 6 || I == 8 || I == 10) + Stream << '-'; + Stream << std::setfill('0') << std::setw(2) << std::uppercase + << std::hex << static_cast<int>(UUIDLC.uuid[I]); + } + BA.UUID = Slice.copyString(Stream.str()); + break; + } + case MachO::LC_RPATH: { + auto RPLC = Obj->getRpathCommand(LCI); + BA.RPaths.emplace_back(Slice.copyString(LCI.Ptr + RPLC.path)); + break; + } + case MachO::LC_SEGMENT_SPLIT_INFO: { + auto SSILC = Obj->getLinkeditDataLoadCommand(LCI); + if (SSILC.datasize == 0) + BA.OSLibNotForSharedCache = true; + break; + } + default: + break; + } + } + + for (auto &Sect : Obj->sections()) { + auto SectName = Sect.getName(); + if (!SectName) + return SectName.takeError(); + if (*SectName != "__objc_imageinfo" && *SectName != "__image_info") + continue; + + auto Content = Sect.getContents(); + if (!Content) + return Content.takeError(); + + if ((Content->size() >= 8) && (Content->front() == 0)) { + uint32_t Flags; + if (Obj->isLittleEndian()) { + auto *p = + reinterpret_cast<const support::ulittle32_t *>(Content->data() + 4); + Flags = *p; + } else { + auto *p = + reinterpret_cast<const support::ubig32_t *>(Content->data() + 4); + Flags = *p; + } + BA.SwiftABI = (Flags >> 8) & 0xFF; + } + } + return Error::success(); +} + +static Error readSymbols(MachOObjectFile *Obj, RecordsSlice &Slice, + const ParseOption &Opt) { + + auto parseExport = [](const auto ExportFlags, + auto Addr) -> std::tuple<SymbolFlags, RecordLinkage> { + SymbolFlags Flags = SymbolFlags::None; + switch (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) { + case MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR: + if (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION) + Flags |= SymbolFlags::WeakDefined; + break; + case MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL: + Flags |= SymbolFlags::ThreadLocalValue; + break; + } + + RecordLinkage Linkage = (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) + ? RecordLinkage::Rexported + : RecordLinkage::Exported; + return {Flags, Linkage}; + }; + + Error Err = Error::success(); + + StringMap<std::pair<SymbolFlags, RecordLinkage>> Exports; + // Collect symbols from export trie first. Sometimes, there are more exports + // in the trie than in n-list due to stripping. This is common for swift + // mangled symbols. + for (auto &Sym : Obj->exports(Err)) { + auto [Flags, Linkage] = parseExport(Sym.flags(), Sym.address()); + Slice.addRecord(Sym.name(), Flags, GlobalRecord::Kind::Unknown, Linkage); + Exports[Sym.name()] = {Flags, Linkage}; + } + + for (const auto &Sym : Obj->symbols()) { + auto FlagsOrErr = Sym.getFlags(); + if (!FlagsOrErr) + return FlagsOrErr.takeError(); + auto Flags = *FlagsOrErr; + + auto NameOrErr = Sym.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + auto Name = *NameOrErr; + + RecordLinkage Linkage = RecordLinkage::Unknown; + SymbolFlags RecordFlags = SymbolFlags::None; + + if (Opt.Undefineds && (Flags & SymbolRef::SF_Undefined)) { + Linkage = RecordLinkage::Undefined; + if (Flags & SymbolRef::SF_Weak) + RecordFlags |= SymbolFlags::WeakReferenced; + } else if (Flags & SymbolRef::SF_Exported) { + auto Exp = Exports.find(Name); + // This should never be possible when binaries are produced with Apple + // linkers. However it is possible to craft dylibs where the export trie + // is either malformed or has conflicting symbols compared to n_list. + if (Exp != Exports.end()) + std::tie(RecordFlags, Linkage) = Exp->second; + else + Linkage = RecordLinkage::Exported; + } else if (Flags & SymbolRef::SF_Hidden) { + Linkage = RecordLinkage::Internal; + } else + continue; + + auto TypeOrErr = Sym.getType(); + if (!TypeOrErr) + return TypeOrErr.takeError(); + auto Type = *TypeOrErr; + + GlobalRecord::Kind GV = (Type & SymbolRef::ST_Function) + ? GlobalRecord::Kind::Function + : GlobalRecord::Kind::Variable; + + if (GV == GlobalRecord::Kind::Function) + RecordFlags |= SymbolFlags::Text; + else + RecordFlags |= SymbolFlags::Data; + + Slice.addRecord(Name, RecordFlags, GV, Linkage); + } + return Err; +} + +static Error load(MachOObjectFile *Obj, RecordsSlice &Slice, + const ParseOption &Opt, const Architecture Arch) { + if (Arch == AK_unknown) + return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget); + + if (Opt.MachOHeader) + if (auto Err = readMachOHeader(Obj, Slice)) + return Err; + + if (Opt.SymbolTable) + if (auto Err = readSymbols(Obj, Slice, Opt)) + return Err; + + return Error::success(); +} + +Expected<Records> DylibReader::readFile(MemoryBufferRef Buffer, + const ParseOption &Opt) { + Records Results; + + auto BinOrErr = createBinary(Buffer); + if (!BinOrErr) + return BinOrErr.takeError(); + + Binary &Bin = *BinOrErr.get(); + if (auto *Obj = dyn_cast<MachOObjectFile>(&Bin)) { + const auto Arch = getArchitectureFromCpuType(Obj->getHeader().cputype, + Obj->getHeader().cpusubtype); + if (!Opt.Archs.has(Arch)) + return make_error<TextAPIError>(TextAPIErrorCode::NoSuchArchitecture); + + auto Triples = constructTriples(Obj, Arch); + for (const auto &T : Triples) { + if (mapToPlatformType(T) == PLATFORM_UNKNOWN) + return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget); + Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T}))); + if (auto Err = load(Obj, *Results.back(), Opt, Arch)) + return std::move(Err); + Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier(); + } + return Results; + } + + // Only expect MachO universal binaries at this point. + assert(isa<MachOUniversalBinary>(&Bin) && + "Expected a MachO universal binary."); + auto *UB = cast<MachOUniversalBinary>(&Bin); + + for (auto OI = UB->begin_objects(), OE = UB->end_objects(); OI != OE; ++OI) { + // Skip architecture if not requested. + auto Arch = + getArchitectureFromCpuType(OI->getCPUType(), OI->getCPUSubType()); + if (!Opt.Archs.has(Arch)) + continue; + + // Skip unknown architectures. + if (Arch == AK_unknown) + continue; + + // This can fail if the object is an archive. + auto ObjOrErr = OI->getAsObjectFile(); + + // Skip the archive and consume the error. + if (!ObjOrErr) { + consumeError(ObjOrErr.takeError()); + continue; + } + + auto &Obj = *ObjOrErr.get(); + switch (Obj.getHeader().filetype) { + default: + break; + case MachO::MH_BUNDLE: + case MachO::MH_DYLIB: + case MachO::MH_DYLIB_STUB: + for (const auto &T : constructTriples(&Obj, Arch)) { + Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T}))); + if (auto Err = load(&Obj, *Results.back(), Opt, Arch)) + return std::move(Err); + } + break; + } + } + + if (Results.empty()) + return make_error<TextAPIError>(TextAPIErrorCode::EmptyResults); + return Results; +} + +Expected<std::unique_ptr<InterfaceFile>> +DylibReader::get(MemoryBufferRef Buffer) { + ParseOption Options; + auto SlicesOrErr = readFile(Buffer, Options); + if (!SlicesOrErr) + return SlicesOrErr.takeError(); + + return convertToInterfaceFile(*SlicesOrErr); +} diff --git a/contrib/llvm-project/llvm/lib/TextAPI/RecordVisitor.cpp b/contrib/llvm-project/llvm/lib/TextAPI/RecordVisitor.cpp new file mode 100644 index 000000000000..cee04e644755 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/TextAPI/RecordVisitor.cpp @@ -0,0 +1,65 @@ +//===- RecordVisitor.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Implements the TAPI Record Visitor. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/TextAPI/RecordVisitor.h" + +using namespace llvm; +using namespace llvm::MachO; + +RecordVisitor::~RecordVisitor() {} +void RecordVisitor::visitObjCInterface(const ObjCInterfaceRecord &) {} +void RecordVisitor::visitObjCCategory(const ObjCCategoryRecord &) {} + +static bool shouldSkipRecord(const Record &R, const bool RecordUndefs) { + if (R.isExported()) + return false; + + // Skip non exported symbols unless for flat namespace libraries. + return !(RecordUndefs && R.isUndefined()); +} + +void SymbolConverter::visitGlobal(const GlobalRecord &GR) { + auto [SymName, SymKind] = parseSymbol(GR.getName(), GR.getFlags()); + if (shouldSkipRecord(GR, RecordUndefs)) + return; + Symbols->addGlobal(SymKind, SymName, GR.getFlags(), Targ); +} + +void SymbolConverter::addIVars(const ArrayRef<ObjCIVarRecord *> IVars, + StringRef ContainerName) { + for (auto *IV : IVars) { + if (shouldSkipRecord(*IV, RecordUndefs)) + continue; + std::string Name = + ObjCIVarRecord::createScopedName(ContainerName, IV->getName()); + Symbols->addGlobal(SymbolKind::ObjectiveCInstanceVariable, Name, + IV->getFlags(), Targ); + } +} + +void SymbolConverter::visitObjCInterface(const ObjCInterfaceRecord &ObjCR) { + if (!shouldSkipRecord(ObjCR, RecordUndefs)) { + Symbols->addGlobal(SymbolKind::ObjectiveCClass, ObjCR.getName(), + ObjCR.getFlags(), Targ); + if (ObjCR.hasExceptionAttribute()) + Symbols->addGlobal(SymbolKind::ObjectiveCClassEHType, ObjCR.getName(), + ObjCR.getFlags(), Targ); + } + + addIVars(ObjCR.getObjCIVars(), ObjCR.getName()); + for (const auto *Cat : ObjCR.getObjCCategories()) + addIVars(Cat->getObjCIVars(), ObjCR.getName()); +} + +void SymbolConverter::visitObjCCategory(const ObjCCategoryRecord &Cat) { + addIVars(Cat.getObjCIVars(), Cat.getName()); +} diff --git a/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp b/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp index a220b255aea3..7ceffc7c9284 100644 --- a/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp +++ b/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/TextAPI/RecordsSlice.h" +#include "llvm/ADT/SetVector.h" #include "llvm/TextAPI/Record.h" #include "llvm/TextAPI/Symbol.h" #include <utility> @@ -142,8 +143,10 @@ GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage, if (Result.second) Result.first->second = std::make_unique<GlobalRecord>(Name, Linkage, Flags, GV); - else + else { updateLinkage(Result.first->second.get(), Linkage); + updateFlags(Result.first->second.get(), Flags); + } return Result.first->second.get(); } @@ -164,6 +167,19 @@ ObjCInterfaceRecord *RecordsSlice::addObjCInterface(StringRef Name, return Result.first->second.get(); } +SymbolFlags Record::mergeFlags(SymbolFlags Flags, RecordLinkage Linkage) { + // Add Linkage properties into Flags. + switch (Linkage) { + case RecordLinkage::Rexported: + Flags |= SymbolFlags::Rexported; + return Flags; + case RecordLinkage::Undefined: + Flags |= SymbolFlags::Undefined; + return Flags; + default: + return Flags; + } +} bool ObjCInterfaceRecord::addObjCCategory(ObjCCategoryRecord *Record) { auto Result = Categories.insert({Name, Record}); @@ -188,11 +204,26 @@ ObjCCategoryRecord *RecordsSlice::addObjCCategory(StringRef ClassToExtend, return Result.first->second.get(); } +std::vector<ObjCIVarRecord *> ObjCContainerRecord::getObjCIVars() const { + std::vector<ObjCIVarRecord *> Records; + llvm::for_each(IVars, + [&](auto &Record) { Records.push_back(Record.second.get()); }); + return Records; +} + +std::vector<ObjCCategoryRecord *> +ObjCInterfaceRecord::getObjCCategories() const { + std::vector<ObjCCategoryRecord *> Records; + llvm::for_each(Categories, + [&](auto &Record) { Records.push_back(Record.second); }); + return Records; +} + ObjCIVarRecord *ObjCContainerRecord::addObjCIVar(StringRef IVar, RecordLinkage Linkage) { auto Result = IVars.insert({IVar, nullptr}); if (Result.second) - Result.first->second = std::make_unique<ObjCIVarRecord>(Name, Linkage); + Result.first->second = std::make_unique<ObjCIVarRecord>(IVar, Linkage); return Result.first->second.get(); } @@ -222,3 +253,88 @@ RecordsSlice::BinaryAttrs &RecordsSlice::getBinaryAttrs() { BA = std::make_unique<BinaryAttrs>(); return *BA; } + +void RecordsSlice::visit(RecordVisitor &V) const { + for (auto &G : Globals) + V.visitGlobal(*G.second); + for (auto &C : Classes) + V.visitObjCInterface(*C.second); + for (auto &Cat : Categories) + V.visitObjCCategory(*Cat.second); +} + +static std::unique_ptr<InterfaceFile> +createInterfaceFile(const Records &Slices, StringRef InstallName) { + // Pickup symbols first. + auto Symbols = std::make_unique<SymbolSet>(); + for (auto &S : Slices) { + if (S->empty()) + continue; + auto &BA = S->getBinaryAttrs(); + if (BA.InstallName != InstallName) + continue; + + SymbolConverter Converter(Symbols.get(), S->getTarget(), + !BA.TwoLevelNamespace); + S->visit(Converter); + } + + auto File = std::make_unique<InterfaceFile>(std::move(Symbols)); + File->setInstallName(InstallName); + // Assign other attributes. + for (auto &S : Slices) { + if (S->empty()) + continue; + auto &BA = S->getBinaryAttrs(); + if (BA.InstallName != InstallName) + continue; + const Target &Targ = S->getTarget(); + File->addTarget(Targ); + if (File->getFileType() == FileType::Invalid) + File->setFileType(BA.File); + if (BA.AppExtensionSafe && !File->isApplicationExtensionSafe()) + File->setApplicationExtensionSafe(); + if (BA.TwoLevelNamespace && !File->isTwoLevelNamespace()) + File->setTwoLevelNamespace(); + if (BA.OSLibNotForSharedCache && !File->isOSLibNotForSharedCache()) + File->setOSLibNotForSharedCache(); + if (File->getCurrentVersion().empty()) + File->setCurrentVersion(BA.CurrentVersion); + if (File->getCompatibilityVersion().empty()) + File->setCompatibilityVersion(BA.CompatVersion); + if (File->getSwiftABIVersion() == 0) + File->setSwiftABIVersion(BA.SwiftABI); + if (File->getPath().empty()) + File->setPath(BA.Path); + if (!BA.ParentUmbrella.empty()) + File->addParentUmbrella(Targ, BA.ParentUmbrella); + for (const auto &Client : BA.AllowableClients) + File->addAllowableClient(Client, Targ); + for (const auto &Lib : BA.RexportedLibraries) + File->addReexportedLibrary(Lib, Targ); + } + + return File; +} + +std::unique_ptr<InterfaceFile> +llvm::MachO::convertToInterfaceFile(const Records &Slices) { + std::unique_ptr<InterfaceFile> File; + if (Slices.empty()) + return File; + + SetVector<StringRef> InstallNames; + for (auto &S : Slices) { + auto Name = S->getBinaryAttrs().InstallName; + if (Name.empty()) + continue; + InstallNames.insert(Name); + } + + File = createInterfaceFile(Slices, *InstallNames.begin()); + for (auto it = std::next(InstallNames.begin()); it != InstallNames.end(); + ++it) + File->addDocument(createInterfaceFile(Slices, *it)); + + return File; +} diff --git a/contrib/llvm-project/llvm/lib/TextAPI/Utils.cpp b/contrib/llvm-project/llvm/lib/TextAPI/Utils.cpp new file mode 100644 index 000000000000..6d85083e0b54 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/TextAPI/Utils.cpp @@ -0,0 +1,40 @@ +//===- Utils.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements utility functions for TextAPI Darwin operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TextAPI/Utils.h" + +using namespace llvm; +using namespace llvm::MachO; + +void llvm::MachO::replace_extension(SmallVectorImpl<char> &Path, + const Twine &Extension) { + StringRef P(Path.begin(), Path.size()); + auto ParentPath = sys::path::parent_path(P); + auto Filename = sys::path::filename(P); + + if (!ParentPath.ends_with(Filename.str() + ".framework")) { + sys::path::replace_extension(Path, Extension); + return; + } + // Framework dylibs do not have a file extension, in those cases the new + // extension is appended. e.g. given Path: "Foo.framework/Foo" and Extension: + // "tbd", the result is "Foo.framework/Foo.tbd". + SmallString<8> Storage; + StringRef Ext = Extension.toStringRef(Storage); + + // Append '.' if needed. + if (!Ext.empty() && Ext[0] != '.') + Path.push_back('.'); + + // Append extension. + Path.append(Ext.begin(), Ext.end()); +} diff --git a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index 781bc9a058e1..834903857a88 100644 --- a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -171,7 +171,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) { if (!Def) { llvm::errs() << "error parsing definition\n" - << errorToErrorCode(Def.takeError()).message(); + << errorToErrorCode(Def.takeError()).message() << "\n"; return 1; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 8e1f782f7cd8..b2618e35b085 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -291,42 +291,15 @@ static const Value *getPointerOperand(const Instruction *I, } /// Helper function to create a pointer based on \p Ptr, and advanced by \p -/// Offset bytes. To aid later analysis the method tries to build -/// getelement pointer instructions that traverse the natural type of \p Ptr if -/// possible. If that fails, the remaining offset is adjusted byte-wise, hence -/// through a cast to i8*. -/// -/// TODO: This could probably live somewhere more prominantly if it doesn't -/// already exist. -static Value *constructPointer(Type *PtrElemTy, Value *Ptr, int64_t Offset, - IRBuilder<NoFolder> &IRB, const DataLayout &DL) { - assert(Offset >= 0 && "Negative offset not supported yet!"); +/// Offset bytes. +static Value *constructPointer(Value *Ptr, int64_t Offset, + IRBuilder<NoFolder> &IRB) { LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset << "-bytes\n"); - if (Offset) { - Type *Ty = PtrElemTy; - APInt IntOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); - SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(Ty, IntOffset); - - SmallVector<Value *, 4> ValIndices; - std::string GEPName = Ptr->getName().str(); - for (const APInt &Index : IntIndices) { - ValIndices.push_back(IRB.getInt(Index)); - GEPName += "." + std::to_string(Index.getZExtValue()); - } - - // Create a GEP for the indices collected above. - Ptr = IRB.CreateGEP(PtrElemTy, Ptr, ValIndices, GEPName); - - // If an offset is left we use byte-wise adjustment. - if (IntOffset != 0) { - Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(IntOffset), - GEPName + ".b" + Twine(IntOffset.getZExtValue())); - } - } - - LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n"); + if (Offset) + Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt64(Offset), + Ptr->getName() + ".b" + Twine(Offset)); return Ptr; } @@ -7487,16 +7460,15 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) { const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { - Value *Ptr = constructPointer( - PrivType, &Base, PrivStructLayout->getElementOffset(u), IRB, DL); + Value *Ptr = + constructPointer(&Base, PrivStructLayout->getElementOffset(u), IRB); new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); } } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) { Type *PointeeTy = PrivArrayType->getElementType(); uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy); for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { - Value *Ptr = - constructPointer(PrivType, &Base, u * PointeeTySize, IRB, DL); + Value *Ptr = constructPointer(&Base, u * PointeeTySize, IRB); new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); } } else { @@ -7521,8 +7493,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { Type *PointeeTy = PrivStructType->getElementType(u); - Value *Ptr = constructPointer( - PrivType, Base, PrivStructLayout->getElementOffset(u), IRB, DL); + Value *Ptr = + constructPointer(Base, PrivStructLayout->getElementOffset(u), IRB); LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP); L->setAlignment(Alignment); ReplacementValues.push_back(L); @@ -7531,8 +7503,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Type *PointeeTy = PrivArrayType->getElementType(); uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy); for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { - Value *Ptr = - constructPointer(PrivType, Base, u * PointeeTySize, IRB, DL); + Value *Ptr = constructPointer(Base, u * PointeeTySize, IRB); LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP); L->setAlignment(Alignment); ReplacementValues.push_back(L); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index b2665161c090..4176d561363f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2053,6 +2053,9 @@ private: LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() << " functions, result: " << Changed << ".\n"); + if (Changed == ChangeStatus::CHANGED) + OMPInfoCache.invalidateAnalyses(); + return Changed == ChangeStatus::CHANGED; } @@ -3763,7 +3766,7 @@ struct AAKernelInfoFunction : AAKernelInfo { ConstantInt *ExecModeC = KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); ConstantInt *AssumedExecModeC = ConstantInt::get( - ExecModeC->getType(), + ExecModeC->getIntegerType(), ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD); if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); @@ -3792,7 +3795,7 @@ struct AAKernelInfoFunction : AAKernelInfo { ConstantInt *MayUseNestedParallelismC = KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC); ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get( - MayUseNestedParallelismC->getType(), NestedParallelism); + MayUseNestedParallelismC->getIntegerType(), NestedParallelism); setMayUseNestedParallelismOfKernelEnvironment( AssumedMayUseNestedParallelismC); @@ -3801,7 +3804,7 @@ struct AAKernelInfoFunction : AAKernelInfo { KernelInfo::getUseGenericStateMachineFromKernelEnvironment( KernelEnvC); ConstantInt *AssumedUseGenericStateMachineC = - ConstantInt::get(UseGenericStateMachineC->getType(), false); + ConstantInt::get(UseGenericStateMachineC->getIntegerType(), false); setUseGenericStateMachineOfKernelEnvironment( AssumedUseGenericStateMachineC); } @@ -4280,8 +4283,9 @@ struct AAKernelInfoFunction : AAKernelInfo { // kernel is executed in. assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!"); - setExecModeOfKernelEnvironment(ConstantInt::get( - ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); + setExecModeOfKernelEnvironment( + ConstantInt::get(ExecModeC->getIntegerType(), + ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); ++NumOpenMPTargetRegionKernelsSPMD; @@ -4332,7 +4336,7 @@ struct AAKernelInfoFunction : AAKernelInfo { // If not SPMD mode, indicate we use a custom state machine now. setUseGenericStateMachineOfKernelEnvironment( - ConstantInt::get(UseStateMachineC->getType(), false)); + ConstantInt::get(UseStateMachineC->getIntegerType(), false)); // If we don't actually need a state machine we are done here. This can // happen if there simply are no parallel regions. In the resulting kernel @@ -4658,7 +4662,7 @@ struct AAKernelInfoFunction : AAKernelInfo { KernelInfo::getMayUseNestedParallelismFromKernelEnvironment( AA.KernelEnvC); ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get( - MayUseNestedParallelismC->getType(), AA.NestedParallelism); + MayUseNestedParallelismC->getIntegerType(), AA.NestedParallelism); AA.setMayUseNestedParallelismOfKernelEnvironment( NewMayUseNestedParallelismC); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp index 6c6f0a0eca72..2fd8668d15e2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -794,10 +794,9 @@ SampleProfileLoader::findIndirectCallFunctionSamples( return R; auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL); - auto T = FS->findCallTargetMapAt(CallSite); Sum = 0; - if (T) - for (const auto &T_C : T.get()) + if (auto T = FS->findCallTargetMapAt(CallSite)) + for (const auto &T_C : *T) Sum += T_C.second; if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { if (M->empty()) @@ -1679,7 +1678,8 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { if (!FS) continue; auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL); - auto T = FS->findCallTargetMapAt(CallSite); + ErrorOr<SampleRecord::CallTargetMap> T = + FS->findCallTargetMapAt(CallSite); if (!T || T.get().empty()) continue; if (FunctionSamples::ProfileIsProbeBased) { @@ -2261,9 +2261,8 @@ void SampleProfileMatcher::countProfileCallsiteMismatches( // Compute number of samples in the original profile. uint64_t CallsiteSamples = 0; - auto CTM = FS.findCallTargetMapAt(Loc); - if (CTM) { - for (const auto &I : CTM.get()) + if (auto CTM = FS.findCallTargetMapAt(Loc)) { + for (const auto &I : *CTM) CallsiteSamples += I.second; } const auto *FSMap = FS.findFunctionSamplesMapAt(Loc); diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 5e362f4117d0..63b1e0f64a88 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3956,35 +3956,50 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, const APInt *LC, *RC; if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) && LHS0->getType() == RHS0->getType() && - LHS0->getType()->isIntOrIntVectorTy() && - (LHS->hasOneUse() || RHS->hasOneUse())) { + LHS0->getType()->isIntOrIntVectorTy()) { // Convert xor of signbit tests to signbit test of xor'd values: // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0 // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0 // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1 // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1 bool TrueIfSignedL, TrueIfSignedR; - if (isSignBitCheck(PredL, *LC, TrueIfSignedL) && + if ((LHS->hasOneUse() || RHS->hasOneUse()) && + isSignBitCheck(PredL, *LC, TrueIfSignedL) && isSignBitCheck(PredR, *RC, TrueIfSignedR)) { Value *XorLR = Builder.CreateXor(LHS0, RHS0); return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(XorLR) : Builder.CreateIsNotNeg(XorLR); } - // (X > C) ^ (X < C + 2) --> X != C + 1 - // (X < C + 2) ^ (X > C) --> X != C + 1 - // Considering the correctness of this pattern, we should avoid that C is - // non-negative and C + 2 is negative, although it will be matched by other - // patterns. - const APInt *C1, *C2; - if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_APInt(C1)) && - PredR == CmpInst::ICMP_SLT && match(RHS1, m_APInt(C2))) || - (PredL == CmpInst::ICMP_SLT && match(LHS1, m_APInt(C2)) && - PredR == CmpInst::ICMP_SGT && match(RHS1, m_APInt(C1)))) - if (LHS0 == RHS0 && *C1 + 2 == *C2 && - (C1->isNegative() || C2->isNonNegative())) - return Builder.CreateICmpNE(LHS0, - ConstantInt::get(LHS0->getType(), *C1 + 1)); + // Fold (icmp pred1 X, C1) ^ (icmp pred2 X, C2) + // into a single comparison using range-based reasoning. + if (LHS0 == RHS0) { + ConstantRange CR1 = ConstantRange::makeExactICmpRegion(PredL, *LC); + ConstantRange CR2 = ConstantRange::makeExactICmpRegion(PredR, *RC); + auto CRUnion = CR1.exactUnionWith(CR2); + auto CRIntersect = CR1.exactIntersectWith(CR2); + if (CRUnion && CRIntersect) + if (auto CR = CRUnion->exactIntersectWith(CRIntersect->inverse())) { + if (CR->isFullSet()) + return ConstantInt::getTrue(I.getType()); + if (CR->isEmptySet()) + return ConstantInt::getFalse(I.getType()); + + CmpInst::Predicate NewPred; + APInt NewC, Offset; + CR->getEquivalentICmp(NewPred, NewC, Offset); + + if ((Offset.isZero() && (LHS->hasOneUse() || RHS->hasOneUse())) || + (LHS->hasOneUse() && RHS->hasOneUse())) { + Value *NewV = LHS0; + Type *Ty = LHS0->getType(); + if (!Offset.isZero()) + NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset)); + return Builder.CreateICmp(NewPred, NewV, + ConstantInt::get(Ty, NewC)); + } + } + } } // Instead of trying to imitate the folds for and/or, decompose this 'xor' diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 1539fa9a3269..3b7fe7fa2266 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -357,9 +357,9 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); - APInt UndefElts(DemandedElts.getBitWidth(), 0); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) + APInt PoisonElts(DemandedElts.getBitWidth(), 0); + if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, + PoisonElts)) return replaceOperand(II, 0, V); return nullptr; @@ -439,12 +439,12 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); - APInt UndefElts(DemandedElts.getBitWidth(), 0); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) + APInt PoisonElts(DemandedElts.getBitWidth(), 0); + if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, + PoisonElts)) return replaceOperand(II, 0, V); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts)) + if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, + PoisonElts)) return replaceOperand(II, 1, V); return nullptr; @@ -1526,9 +1526,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // support. if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) { auto VWidth = IIFVTy->getNumElements(); - APInt UndefElts(VWidth, 0); + APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); - if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) { if (V != II) return replaceInstUsesWith(*II, V); return II; @@ -1539,6 +1539,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *I = foldCommutativeIntrinsicOverSelects(*II)) return I; + if (Instruction *I = foldCommutativeIntrinsicOverPhis(*II)) + return I; + if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI)) return NewCall; } @@ -1793,6 +1796,23 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *NewMinMax = factorizeMinMaxTree(II)) return NewMinMax; + // Try to fold minmax with constant RHS based on range information + const APInt *RHSC; + if (match(I1, m_APIntAllowUndef(RHSC))) { + ICmpInst::Predicate Pred = + ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID)); + bool IsSigned = MinMaxIntrinsic::isSigned(IID); + ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits( + I0, IsSigned, SQ.getWithInstruction(II)); + if (!LHS_CR.isFullSet()) { + if (LHS_CR.icmp(Pred, *RHSC)) + return replaceInstUsesWith(*II, I0); + if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC)) + return replaceInstUsesWith(*II, + ConstantInt::get(II->getType(), *RHSC)); + } + } + break; } case Intrinsic::bitreverse: { @@ -4237,3 +4257,22 @@ InstCombinerImpl::foldCommutativeIntrinsicOverSelects(IntrinsicInst &II) { return nullptr; } + +Instruction * +InstCombinerImpl::foldCommutativeIntrinsicOverPhis(IntrinsicInst &II) { + assert(II.isCommutative() && "Instruction should be commutative"); + + PHINode *LHS = dyn_cast<PHINode>(II.getOperand(0)); + PHINode *RHS = dyn_cast<PHINode>(II.getOperand(1)); + + if (!LHS || !RHS) + return nullptr; + + if (auto P = matchSymmetricPhiNodesPair(LHS, RHS)) { + replaceOperand(II, 0, P->first); + replaceOperand(II, 1, P->second); + return &II; + } + + return nullptr; +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 1d50fa9b6bf7..9e76a0cf17b1 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -278,6 +278,16 @@ private: IntrinsicInst &Tramp); Instruction *foldCommutativeIntrinsicOverSelects(IntrinsicInst &II); + // Match a pair of Phi Nodes like + // phi [a, BB0], [b, BB1] & phi [b, BB0], [a, BB1] + // Return the matched two operands. + std::optional<std::pair<Value *, Value *>> + matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS); + + // Tries to fold (op phi(a, b) phi(b, a)) -> (op a, b) + // while op is a commutative intrinsic call. + Instruction *foldCommutativeIntrinsicOverPhis(IntrinsicInst &II); + Value *simplifyMaskedLoad(IntrinsicInst &II); Instruction *simplifyMaskedStore(IntrinsicInst &II); Instruction *simplifyMaskedGather(IntrinsicInst &II); @@ -492,6 +502,11 @@ public: /// X % (C0 * C1) Value *SimplifyAddWithRemainder(BinaryOperator &I); + // Tries to fold (Binop phi(a, b) phi(b, a)) -> (Binop a, b) + // while Binop is commutative. + Value *SimplifyPhiCommutativeBinaryOp(BinaryOperator &I, Value *LHS, + Value *RHS); + // Binary Op helper for select operations where the expression can be // efficiently reorganized. Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, @@ -550,7 +565,7 @@ public: bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt &UndefElts, unsigned Depth = 0, + APInt &PoisonElts, unsigned Depth = 0, bool AllowMultipleUsers = false) override; /// Canonicalize the position of binops relative to shufflevector. diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index e5566578869d..f0ea3d9fcad5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -350,6 +350,13 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (match(&I, m_c_Mul(m_OneUse(m_Neg(m_Value(X))), m_Value(Y)))) return BinaryOperator::CreateNeg(Builder.CreateMul(X, Y)); + // (-X * Y) * -X --> (X * Y) * X + // (-X << Y) * -X --> (X << Y) * X + if (match(Op1, m_Neg(m_Value(X)))) { + if (Value *NegOp0 = Negator::Negate(false, /*IsNSW*/ false, Op0, *this)) + return BinaryOperator::CreateMul(NegOp0, X); + } + // (X / Y) * Y = X - (X % Y) // (X / Y) * -Y = (X % Y) - X { diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 2dda46986f0f..20bf00344b14 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2440,9 +2440,9 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { return nullptr; unsigned NumElts = VecTy->getNumElements(); - APInt UndefElts(NumElts, 0); + APInt PoisonElts(NumElts, 0); APInt AllOnesEltMask(APInt::getAllOnes(NumElts)); - if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, PoisonElts)) { if (V != &Sel) return replaceInstUsesWith(Sel, V); return &Sel; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 846116a929b1..a8a5f9831e15 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1319,8 +1319,8 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits( } /// The specified value produces a vector with any number of elements. -/// This method analyzes which elements of the operand are undef or poison and -/// returns that information in UndefElts. +/// This method analyzes which elements of the operand are poison and +/// returns that information in PoisonElts. /// /// DemandedElts contains the set of elements that are actually used by the /// caller, and by default (AllowMultipleUsers equals false) the value is @@ -1333,7 +1333,7 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits( /// returned. This returns null if no change was made. Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt &UndefElts, + APInt &PoisonElts, unsigned Depth, bool AllowMultipleUsers) { // Cannot analyze scalable type. The number of vector elements is not a @@ -1345,18 +1345,18 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, APInt EltMask(APInt::getAllOnes(VWidth)); assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); - if (match(V, m_Undef())) { - // If the entire vector is undef or poison, just return this info. - UndefElts = EltMask; + if (match(V, m_Poison())) { + // If the entire vector is poison, just return this info. + PoisonElts = EltMask; return nullptr; } if (DemandedElts.isZero()) { // If nothing is demanded, provide poison. - UndefElts = EltMask; + PoisonElts = EltMask; return PoisonValue::get(V->getType()); } - UndefElts = 0; + PoisonElts = 0; if (auto *C = dyn_cast<Constant>(V)) { // Check if this is identity. If so, return 0 since we are not simplifying @@ -1370,7 +1370,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, for (unsigned i = 0; i != VWidth; ++i) { if (!DemandedElts[i]) { // If not demanded, set to poison. Elts.push_back(Poison); - UndefElts.setBit(i); + PoisonElts.setBit(i); continue; } @@ -1378,8 +1378,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (!Elt) return nullptr; Elts.push_back(Elt); - if (isa<UndefValue>(Elt)) // Already undef or poison. - UndefElts.setBit(i); + if (isa<PoisonValue>(Elt)) // Already poison. + PoisonElts.setBit(i); } // If we changed the constant, return it. @@ -1400,7 +1400,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // They'll be handled when it's their turn to be visited by // the main instcombine process. if (Depth != 0) - // TODO: Just compute the UndefElts information recursively. + // TODO: Just compute the PoisonElts information recursively. return nullptr; // Conservatively assume that all elements are needed. @@ -1422,8 +1422,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } }; - APInt UndefElts2(VWidth, 0); - APInt UndefElts3(VWidth, 0); + APInt PoisonElts2(VWidth, 0); + APInt PoisonElts3(VWidth, 0); switch (I->getOpcode()) { default: break; @@ -1449,17 +1449,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (i == 0 ? match(I->getOperand(i), m_Undef()) : match(I->getOperand(i), m_Poison())) { // If the entire vector is undefined, just return this info. - UndefElts = EltMask; + PoisonElts = EltMask; return nullptr; } if (I->getOperand(i)->getType()->isVectorTy()) { - APInt UndefEltsOp(VWidth, 0); - simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp); + APInt PoisonEltsOp(VWidth, 0); + simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp); // gep(x, undef) is not undef, so skip considering idx ops here // Note that we could propagate poison, but we can't distinguish between // undef & poison bits ATM if (i == 0) - UndefElts |= UndefEltsOp; + PoisonElts |= PoisonEltsOp; } } @@ -1472,7 +1472,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (!Idx) { // Note that we can't propagate undef elt info, because we don't know // which elt is getting updated. - simplifyAndSetOp(I, 0, DemandedElts, UndefElts2); + simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2); break; } @@ -1487,7 +1487,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // was extracted from the same index in another vector with the same type, // replace this insert with that other vector. // Note: This is attempted before the call to simplifyAndSetOp because that - // may change UndefElts to a value that does not match with Vec. + // may change PoisonElts to a value that does not match with Vec. Value *Vec; if (PreInsertDemandedElts == 0 && match(I->getOperand(1), @@ -1496,7 +1496,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return Vec; } - simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts); + simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts); // If this is inserting an element that isn't demanded, remove this // insertelement. @@ -1506,7 +1506,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } // The inserted element is defined. - UndefElts.clearBit(IdxNo); + PoisonElts.clearBit(IdxNo); break; } case Instruction::ShuffleVector: { @@ -1520,17 +1520,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // operand. if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) && DemandedElts.isAllOnes()) { - if (!match(I->getOperand(1), m_Undef())) { + if (!isa<PoisonValue>(I->getOperand(1))) { I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType())); MadeChange = true; } APInt LeftDemanded(OpWidth, 1); - APInt LHSUndefElts(OpWidth, 0); - simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts); - if (LHSUndefElts[0]) - UndefElts = EltMask; + APInt LHSPoisonElts(OpWidth, 0); + simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); + if (LHSPoisonElts[0]) + PoisonElts = EltMask; else - UndefElts.clearAllBits(); + PoisonElts.clearAllBits(); break; } @@ -1549,11 +1549,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } } - APInt LHSUndefElts(OpWidth, 0); - simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts); + APInt LHSPoisonElts(OpWidth, 0); + simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); - APInt RHSUndefElts(OpWidth, 0); - simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts); + APInt RHSPoisonElts(OpWidth, 0); + simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts); // If this shuffle does not change the vector length and the elements // demanded by this shuffle are an identity mask, then this shuffle is @@ -1579,7 +1579,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return Shuffle->getOperand(0); } - bool NewUndefElts = false; + bool NewPoisonElts = false; unsigned LHSIdx = -1u, LHSValIdx = -1u; unsigned RHSIdx = -1u, RHSValIdx = -1u; bool LHSUniform = true; @@ -1587,23 +1587,23 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { - UndefElts.setBit(i); + PoisonElts.setBit(i); } else if (!DemandedElts[i]) { - NewUndefElts = true; - UndefElts.setBit(i); + NewPoisonElts = true; + PoisonElts.setBit(i); } else if (MaskVal < OpWidth) { - if (LHSUndefElts[MaskVal]) { - NewUndefElts = true; - UndefElts.setBit(i); + if (LHSPoisonElts[MaskVal]) { + NewPoisonElts = true; + PoisonElts.setBit(i); } else { LHSIdx = LHSIdx == -1u ? i : OpWidth; LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth; LHSUniform = LHSUniform && (MaskVal == i); } } else { - if (RHSUndefElts[MaskVal - OpWidth]) { - NewUndefElts = true; - UndefElts.setBit(i); + if (RHSPoisonElts[MaskVal - OpWidth]) { + NewPoisonElts = true; + PoisonElts.setBit(i); } else { RHSIdx = RHSIdx == -1u ? i : OpWidth; RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth; @@ -1646,11 +1646,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return New; } } - if (NewUndefElts) { + if (NewPoisonElts) { // Add additional discovered undefs. SmallVector<int, 16> Elts; for (unsigned i = 0; i < VWidth; ++i) { - if (UndefElts[i]) + if (PoisonElts[i]) Elts.push_back(PoisonMaskElem); else Elts.push_back(Shuffle->getMaskValue(i)); @@ -1665,12 +1665,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // on the current demanded elements. SelectInst *Sel = cast<SelectInst>(I); if (Sel->getCondition()->getType()->isVectorTy()) { - // TODO: We are not doing anything with UndefElts based on this call. + // TODO: We are not doing anything with PoisonElts based on this call. // It is overwritten below based on the other select operands. If an // element of the select condition is known undef, then we are free to // choose the output value from either arm of the select. If we know that // one of those values is undef, then the output can be undef. - simplifyAndSetOp(I, 0, DemandedElts, UndefElts); + simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); } // Next, see if we can transform the arms of the select. @@ -1692,12 +1692,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } } - simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2); - simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3); + simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2); + simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3); // Output elements are undefined if the element from each arm is undefined. // TODO: This can be improved. See comment in select condition handling. - UndefElts = UndefElts2 & UndefElts3; + PoisonElts = PoisonElts2 & PoisonElts3; break; } case Instruction::BitCast: { @@ -1706,7 +1706,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (!VTy) break; unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements(); APInt InputDemandedElts(InVWidth, 0); - UndefElts2 = APInt(InVWidth, 0); + PoisonElts2 = APInt(InVWidth, 0); unsigned Ratio; if (VWidth == InVWidth) { @@ -1735,25 +1735,25 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, break; } - simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2); + simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2); if (VWidth == InVWidth) { - UndefElts = UndefElts2; + PoisonElts = PoisonElts2; } else if ((VWidth % InVWidth) == 0) { // If the number of elements in the output is a multiple of the number of // elements in the input then an output element is undef if the // corresponding input element is undef. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) - if (UndefElts2[OutIdx / Ratio]) - UndefElts.setBit(OutIdx); + if (PoisonElts2[OutIdx / Ratio]) + PoisonElts.setBit(OutIdx); } else if ((InVWidth % VWidth) == 0) { // If the number of elements in the input is a multiple of the number of // elements in the output then an output element is undef if all of the // corresponding input elements are undef. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { - APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); + APInt SubUndef = PoisonElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); if (SubUndef.popcount() == Ratio) - UndefElts.setBit(OutIdx); + PoisonElts.setBit(OutIdx); } } else { llvm_unreachable("Unimp"); @@ -1762,7 +1762,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } case Instruction::FPTrunc: case Instruction::FPExt: - simplifyAndSetOp(I, 0, DemandedElts, UndefElts); + simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); break; case Instruction::Call: { @@ -1785,18 +1785,18 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, DemandedPassThrough.clearBit(i); } if (II->getIntrinsicID() == Intrinsic::masked_gather) - simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2); - simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3); + simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2); + simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3); // Output elements are undefined if the element from both sources are. // TODO: can strengthen via mask as well. - UndefElts = UndefElts2 & UndefElts3; + PoisonElts = PoisonElts2 & PoisonElts3; break; } default: { // Handle target specific intrinsics std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic( - *II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + *II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3, simplifyAndSetOp); if (V) return *V; @@ -1859,18 +1859,18 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return ShufBO; } - simplifyAndSetOp(I, 0, DemandedElts, UndefElts); - simplifyAndSetOp(I, 1, DemandedElts, UndefElts2); + simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); + simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2); // Output elements are undefined if both are undefined. Consider things // like undef & 0. The result is known zero, not undef. - UndefElts &= UndefElts2; + PoisonElts &= PoisonElts2; } - // If we've proven all of the lanes undef, return an undef value. + // If we've proven all of the lanes poison, return a poison value. // TODO: Intersect w/demanded lanes - if (UndefElts.isAllOnes()) - return UndefValue::get(I->getType()); + if (PoisonElts.isAllOnes()) + return PoisonValue::get(I->getType()); return MadeChange ? I : nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index c8b58c51d4e6..18ab510aae7f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -388,7 +388,7 @@ static APInt findDemandedEltsByAllUsers(Value *V) { /// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't /// matter, we just want a consistent type to simplify CSE. static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) { - const unsigned IndexBW = IndexC->getType()->getBitWidth(); + const unsigned IndexBW = IndexC->getBitWidth(); if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64) return nullptr; return ConstantInt::get(IndexC->getContext(), @@ -581,20 +581,20 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // If the input vector has a single use, simplify it based on this use // property. if (SrcVec->hasOneUse()) { - APInt UndefElts(NumElts, 0); + APInt PoisonElts(NumElts, 0); APInt DemandedElts(NumElts, 0); DemandedElts.setBit(IndexC->getZExtValue()); if (Value *V = - SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) + SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts)) return replaceOperand(EI, 0, V); } else { // If the input vector has multiple uses, simplify it based on a union // of all elements used. APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); if (!DemandedElts.isAllOnes()) { - APInt UndefElts(NumElts, 0); + APInt PoisonElts(NumElts, 0); if (Value *V = SimplifyDemandedVectorElts( - SrcVec, DemandedElts, UndefElts, 0 /* Depth */, + SrcVec, DemandedElts, PoisonElts, 0 /* Depth */, true /* AllowMultipleUsers */)) { if (V != SrcVec) { Worklist.addValue(SrcVec); @@ -777,10 +777,10 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask, assert(V->getType()->isVectorTy() && "Invalid shuffle!"); unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); - if (match(V, m_Undef())) { + if (match(V, m_Poison())) { Mask.assign(NumElts, -1); return std::make_pair( - PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); + PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr); } if (isa<ConstantAggregateZero>(V)) { @@ -1633,7 +1633,8 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { // bitcast (inselt undef, ScalarSrc, IdxOp) Type *ScalarTy = ScalarSrc->getType(); Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount()); - UndefValue *NewUndef = UndefValue::get(VecTy); + Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy) + : UndefValue::get(VecTy); Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp); return new BitCastInst(NewInsElt, IE.getType()); } @@ -1713,9 +1714,10 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) { unsigned VWidth = VecTy->getNumElements(); - APInt UndefElts(VWidth, 0); + APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); - if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, + PoisonElts)) { if (V != &IE) return replaceInstUsesWith(IE, V); return &IE; @@ -1918,6 +1920,10 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask, assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); Type *EltTy = V->getType()->getScalarType(); + + if (isa<PoisonValue>(V)) + return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size())); + if (match(V, m_Undef())) return UndefValue::get(FixedVectorType::get(EltTy, Mask.size())); @@ -2639,7 +2645,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?"); // Index is updated to the potentially translated insertion lane. - IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex); + IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex); return true; }; @@ -2769,6 +2775,11 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (Instruction *I = simplifyBinOpSplats(SVI)) return I; + // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in + // order to support scalable vectors. + if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS)) + return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType())); + if (isa<ScalableVectorType>(LHS->getType())) return nullptr; @@ -2855,9 +2866,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (Instruction *I = foldCastShuffle(SVI, Builder)) return I; - APInt UndefElts(VWidth, 0); + APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); - if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) { if (V != &SVI) return replaceInstUsesWith(SVI, V); return &SVI; @@ -3012,10 +3023,11 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); if (LHSShuffle) - if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef())) + if (!match(LHSShuffle->getOperand(1), m_Poison()) && + !match(RHS, m_Poison())) LHSShuffle = nullptr; if (RHSShuffle) - if (!match(RHSShuffle->getOperand(1), m_Undef())) + if (!match(RHSShuffle->getOperand(1), m_Poison())) RHSShuffle = nullptr; if (!LHSShuffle && !RHSShuffle) return MadeChange ? &SVI : nullptr; @@ -3038,7 +3050,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value* newRHS = RHS; if (LHSShuffle) { // case 1 - if (match(RHS, m_Undef())) { + if (match(RHS, m_Poison())) { newLHS = LHSOp0; newRHS = LHSOp1; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index a7ddadc25de4..7f5a7b666903 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -173,14 +173,14 @@ std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( } std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( - IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, - APInt &UndefElts3, + IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts, + APInt &PoisonElts2, APInt &PoisonElts3, std::function<void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) { // Handle target specific intrinsics if (II.getCalledFunction()->isTargetIntrinsic()) { return TTI.simplifyDemandedVectorEltsIntrinsic( - *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3, SimplifyAndSetOp); } return std::nullopt; @@ -1096,6 +1096,54 @@ Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) { return SimplifySelectsFeedingBinaryOp(I, LHS, RHS); } +std::optional<std::pair<Value *, Value *>> +InstCombinerImpl::matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) { + if (LHS->getParent() != RHS->getParent()) + return std::nullopt; + + if (LHS->getNumIncomingValues() < 2) + return std::nullopt; + + if (!equal(LHS->blocks(), RHS->blocks())) + return std::nullopt; + + Value *L0 = LHS->getIncomingValue(0); + Value *R0 = RHS->getIncomingValue(0); + + for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) { + Value *L1 = LHS->getIncomingValue(I); + Value *R1 = RHS->getIncomingValue(I); + + if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1)) + continue; + + return std::nullopt; + } + + return std::optional(std::pair(L0, R0)); +} + +Value *InstCombinerImpl::SimplifyPhiCommutativeBinaryOp(BinaryOperator &I, + Value *Op0, + Value *Op1) { + assert(I.isCommutative() && "Instruction should be commutative"); + + PHINode *LHS = dyn_cast<PHINode>(Op0); + PHINode *RHS = dyn_cast<PHINode>(Op1); + + if (!LHS || !RHS) + return nullptr; + + if (auto P = matchSymmetricPhiNodesPair(LHS, RHS)) { + Value *BI = Builder.CreateBinOp(I.getOpcode(), P->first, P->second); + if (auto *BO = dyn_cast<BinaryOperator>(BI)) + BO->copyIRFlags(&I); + return BI; + } + + return nullptr; +} + Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS) { @@ -1529,6 +1577,11 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { BO.getParent() != Phi1->getParent()) return nullptr; + if (BO.isCommutative()) { + if (Value *V = SimplifyPhiCommutativeBinaryOp(BO, Phi0, Phi1)) + return replaceInstUsesWith(BO, V); + } + // Fold if there is at least one specific constant value in phi0 or phi1's // incoming values that comes from the same block and this specific constant // value can be used to do optimization for specific binary operator. @@ -1728,8 +1781,8 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { // If both arguments of the binary operation are shuffles that use the same // mask and shuffle within a single vector, move the shuffle after the binop. - if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) && - match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) && + if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) && + match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) && V1->getType() == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) { // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask) @@ -1770,9 +1823,9 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { Constant *C; auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType()); if (InstVTy && - match(&Inst, - m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))), - m_ImmConstant(C))) && + match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(), + m_Mask(Mask))), + m_ImmConstant(C))) && cast<FixedVectorType>(V1->getType())->getNumElements() <= InstVTy->getNumElements()) { assert(InstVTy->getScalarType() == V1->getType()->getScalarType() && @@ -1787,8 +1840,8 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { ArrayRef<int> ShMask = Mask; unsigned SrcVecNumElts = cast<FixedVectorType>(V1->getType())->getNumElements(); - UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType()); - SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar); + PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType()); + SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar); bool MayChange = true; unsigned NumElts = InstVTy->getNumElements(); for (unsigned I = 0; I < NumElts; ++I) { @@ -1801,29 +1854,29 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { // 2. The shuffle needs an element of the constant vector that can't // be mapped to a new constant vector. // 3. This is a widening shuffle that copies elements of V1 into the - // extended elements (extending with undef is allowed). - if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) || + // extended elements (extending with poison is allowed). + if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) || I >= SrcVecNumElts) { MayChange = false; break; } NewVecC[ShMask[I]] = CElt; } - // If this is a widening shuffle, we must be able to extend with undef - // elements. If the original binop does not produce an undef in the high + // If this is a widening shuffle, we must be able to extend with poison + // elements. If the original binop does not produce a poison in the high // lanes, then this transform is not safe. - // Similarly for undef lanes due to the shuffle mask, we can only - // transform binops that preserve undef. - // TODO: We could shuffle those non-undef constant values into the - // result by using a constant vector (rather than an undef vector) + // Similarly for poison lanes due to the shuffle mask, we can only + // transform binops that preserve poison. + // TODO: We could shuffle those non-poison constant values into the + // result by using a constant vector (rather than an poison vector) // as operand 1 of the new binop, but that might be too aggressive // for target-independent shuffle creation. if (I >= SrcVecNumElts || ShMask[I] < 0) { - Constant *MaybeUndef = + Constant *MaybePoison = ConstOp1 - ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL) - : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL); - if (!MaybeUndef || !match(MaybeUndef, m_Undef())) { + ? ConstantFoldBinaryOpOperands(Opcode, PoisonScalar, CElt, DL) + : ConstantFoldBinaryOpOperands(Opcode, CElt, PoisonScalar, DL); + if (!MaybePoison || !isa<PoisonValue>(MaybePoison)) { MayChange = false; break; } @@ -1831,9 +1884,10 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { } if (MayChange) { Constant *NewC = ConstantVector::get(NewVecC); - // It may not be safe to execute a binop on a vector with undef elements + // It may not be safe to execute a binop on a vector with poison elements // because the entire instruction can be folded to undef or create poison // that did not exist in the original code. + // TODO: The shift case should not be necessary. if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1)) NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1); @@ -2241,10 +2295,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { // compile-time. if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) { auto VWidth = GEPFVTy->getNumElements(); - APInt UndefElts(VWidth, 0); + APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask, - UndefElts)) { + PoisonElts)) { if (V != &GEP) return replaceInstUsesWith(GEP, V); return &GEP; @@ -2462,7 +2516,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Idx2); } ConstantInt *C; - if (match(GEP.getOperand(1), m_OneUse(m_SExt(m_OneUse(m_NSWAdd( + if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAdd( m_Value(Idx1), m_ConstantInt(C))))))) { // %add = add nsw i32 %idx1, idx2 // %sidx = sext i32 %add to i64 diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6468d07b4f4f..afb0e6cd1548 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2737,7 +2737,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // the shadow memory. // We cannot just ignore these methods, because they may call other // instrumented functions. - if (F.getName().find(" load]") != std::string::npos) { + if (F.getName().contains(" load]")) { FunctionCallee AsanInitFunction = declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {}); IRBuilder<> IRB(&F.front(), F.front().begin()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 539b7441d24b..2236e9cd44c5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -535,7 +535,7 @@ bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { // the shadow memory. // We cannot just ignore these methods, because they may call other // instrumented functions. - if (F.getName().find(" load]") != std::string::npos) { + if (F.getName().contains(" load]")) { FunctionCallee MemProfInitFunction = declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); IRBuilder<> IRB(&F.front(), F.front().begin()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index fe672a4377a1..ce570bdfd8b8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -603,7 +603,7 @@ void ModuleSanitizerCoverage::instrumentFunction( Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { if (F.empty()) return; - if (F.getName().find(".module_ctor") != std::string::npos) + if (F.getName().contains(".module_ctor")) return; // Should not instrument sanitizer init functions. if (F.getName().starts_with("__sanitizer_")) return; // Don't instrument __sanitizer_* callbacks. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 1fb9d7fff32f..9e40d94dd73c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -674,8 +674,7 @@ void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) { llvm::stable_sort(ConstCandVec, [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) { if (LHS.ConstInt->getType() != RHS.ConstInt->getType()) - return LHS.ConstInt->getType()->getBitWidth() < - RHS.ConstInt->getType()->getBitWidth(); + return LHS.ConstInt->getBitWidth() < RHS.ConstInt->getBitWidth(); return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue()); }); @@ -890,7 +889,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) { Type *Ty = ConstInfo.BaseExpr->getType(); Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP); } else { - IntegerType *Ty = ConstInfo.BaseInt->getType(); + IntegerType *Ty = ConstInfo.BaseInt->getIntegerType(); Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 18266ba07898..899d7e0a11e6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -273,7 +273,16 @@ class ConstraintInfo { public: ConstraintInfo(const DataLayout &DL, ArrayRef<Value *> FunctionArgs) - : UnsignedCS(FunctionArgs), SignedCS(FunctionArgs), DL(DL) {} + : UnsignedCS(FunctionArgs), SignedCS(FunctionArgs), DL(DL) { + auto &Value2Index = getValue2Index(false); + // Add Arg > -1 constraints to unsigned system for all function arguments. + for (Value *Arg : FunctionArgs) { + ConstraintTy VarPos(SmallVector<int64_t, 8>(Value2Index.size() + 1, 0), + false, false, false); + VarPos.Coefficients[Value2Index[Arg]] = -1; + UnsignedCS.addVariableRow(VarPos.Coefficients); + } + } DenseMap<Value *, unsigned> &getValue2Index(bool Signed) { return Signed ? SignedCS.getValue2Index() : UnsignedCS.getValue2Index(); @@ -1365,18 +1374,34 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info, ReproducerCondStack.pop_back(); } -/// Check if the first condition for an AND implies the second. -static bool checkAndSecondOpImpliedByFirst( +/// Check if either the first condition of an AND or OR is implied by the +/// (negated in case of OR) second condition or vice versa. +static bool checkOrAndOpImpliedByOther( FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule, SmallVectorImpl<ReproducerEntry> &ReproducerCondStack, SmallVectorImpl<StackEntry> &DFSInStack) { CmpInst::Predicate Pred; Value *A, *B; - Instruction *And = CB.getContextInst(); - if (!match(And->getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B)))) + Instruction *JoinOp = CB.getContextInst(); + CmpInst *CmpToCheck = cast<CmpInst>(CB.getInstructionToSimplify()); + unsigned OtherOpIdx = JoinOp->getOperand(0) == CmpToCheck ? 1 : 0; + + // Don't try to simplify the first condition of a select by the second, as + // this may make the select more poisonous than the original one. + // TODO: check if the first operand may be poison. + if (OtherOpIdx != 0 && isa<SelectInst>(JoinOp)) return false; + if (!match(JoinOp->getOperand(OtherOpIdx), + m_ICmp(Pred, m_Value(A), m_Value(B)))) + return false; + + // For OR, check if the negated condition implies CmpToCheck. + bool IsOr = match(JoinOp, m_LogicalOr()); + if (IsOr) + Pred = CmpInst::getInversePredicate(Pred); + // Optimistically add fact from first condition. unsigned OldSize = DFSInStack.size(); Info.addFact(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack); @@ -1385,11 +1410,19 @@ static bool checkAndSecondOpImpliedByFirst( bool Changed = false; // Check if the second condition can be simplified now. - ICmpInst *Cmp = cast<ICmpInst>(And->getOperand(1)); - if (auto ImpliedCondition = checkCondition( - Cmp->getPredicate(), Cmp->getOperand(0), Cmp->getOperand(1), Cmp, - Info, CB.NumIn, CB.NumOut, CB.getContextInst())) { - And->setOperand(1, ConstantInt::getBool(And->getType(), *ImpliedCondition)); + if (auto ImpliedCondition = + checkCondition(CmpToCheck->getPredicate(), CmpToCheck->getOperand(0), + CmpToCheck->getOperand(1), CmpToCheck, Info, CB.NumIn, + CB.NumOut, CB.getContextInst())) { + if (IsOr && isa<SelectInst>(JoinOp)) { + JoinOp->setOperand( + OtherOpIdx == 0 ? 2 : 0, + ConstantInt::getBool(JoinOp->getType(), *ImpliedCondition)); + } else + JoinOp->setOperand( + 1 - OtherOpIdx, + ConstantInt::getBool(JoinOp->getType(), *ImpliedCondition)); + Changed = true; } @@ -1442,6 +1475,17 @@ void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B, DFSInStack.emplace_back(NumIn, NumOut, R.IsSigned, std::move(ValuesToRelease)); + if (!R.IsSigned) { + for (Value *V : NewVariables) { + ConstraintTy VarPos(SmallVector<int64_t, 8>(Value2Index.size() + 1, 0), + false, false, false); + VarPos.Coefficients[Value2Index[V]] = -1; + CSToUse.addVariableRow(VarPos.Coefficients); + DFSInStack.emplace_back(NumIn, NumOut, R.IsSigned, + SmallVector<Value *, 2>()); + } + } + if (R.isEq()) { // Also add the inverted constraint for equality constraints. for (auto &Coeff : R.Coefficients) @@ -1609,11 +1653,11 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, bool Simplified = checkAndReplaceCondition( Cmp, Info, CB.NumIn, CB.NumOut, CB.getContextInst(), ReproducerModule.get(), ReproducerCondStack, S.DT, ToRemove); - if (!Simplified && match(CB.getContextInst(), - m_LogicalAnd(m_Value(), m_Specific(Inst)))) { + if (!Simplified && + match(CB.getContextInst(), m_LogicalOp(m_Value(), m_Value()))) { Simplified = - checkAndSecondOpImpliedByFirst(CB, Info, ReproducerModule.get(), - ReproducerCondStack, DFSInStack); + checkOrAndOpImpliedByOther(CB, Info, ReproducerModule.get(), + ReproducerCondStack, DFSInStack); } Changed |= Simplified; } @@ -1687,7 +1731,8 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, #ifndef NDEBUG unsigned SignedEntries = count_if(DFSInStack, [](const StackEntry &E) { return E.IsSigned; }); - assert(Info.getCS(false).size() == DFSInStack.size() - SignedEntries && + assert(Info.getCS(false).size() - FunctionArgs.size() == + DFSInStack.size() - SignedEntries && "updates to CS and DFSInStack are out of sync"); assert(Info.getCS(true).size() == SignedEntries && "updates to CS and DFSInStack are out of sync"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index d2dfc764d042..c44d3748a80d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -935,11 +935,13 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR, UDiv->setDebugLoc(SDI->getDebugLoc()); UDiv->setIsExact(SDI->isExact()); - Value *Res = UDiv; + auto *Res = UDiv; // If the operands had two different domains, we need to negate the result. - if (Ops[0].D != Ops[1].D) + if (Ops[0].D != Ops[1].D) { Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + Res->setDebugLoc(SDI->getDebugLoc()); + } SDI->replaceAllUsesWith(Res); SDI->eraseFromParent(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index edfeb36f3422..c5bf913cda30 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -521,7 +521,7 @@ struct AllSwitchPaths { const BasicBlock *PrevBB = Path.back(); for (const BasicBlock *BB : Path) { - if (StateDef.count(BB) != 0) { + if (StateDef.contains(BB)) { const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]); assert(Phi && "Expected a state-defining instr to be a phi node."); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp index 5e58af0edc15..e36578f3de7a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp @@ -592,7 +592,7 @@ uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) { /// Returns true if a value number exists for the specified value. bool GVNPass::ValueTable::exists(Value *V) const { - return valueNumbering.count(V) != 0; + return valueNumbering.contains(V); } /// lookup_or_add - Returns the value number for the specified value, assigning diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp index b1add3c42976..eef94636578d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -343,9 +343,8 @@ static bool verifyTripCount(Value *RHS, Loop *L, // If the RHS of the compare is equal to the backedge taken count we need // to add one to get the trip count. if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) { - ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1); - Value *NewRHS = ConstantInt::get( - ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue()); + Value *NewRHS = ConstantInt::get(ConstantRHS->getContext(), + ConstantRHS->getValue() + 1); return setLoopComponents(NewRHS, TripCount, Increment, IterationInstructions); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 39607464dd00..a58bbe318563 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -7006,7 +7006,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, LLVM_DEBUG(dbgs() << "Old term-cond:\n" << *OldTermCond << "\n" - << "New term-cond:\b" << *NewTermCond << "\n"); + << "New term-cond:\n" << *NewTermCond << "\n"); BI->setCondition(NewTermCond); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index f14541a1a037..7cfeb019af97 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -200,6 +200,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.Count = 0; UP.DefaultUnrollRuntimeCount = 8; UP.MaxCount = std::numeric_limits<unsigned>::max(); + UP.MaxUpperBound = UnrollMaxUpperBound; UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max(); UP.BEInsns = 2; UP.Partial = false; @@ -237,6 +238,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost; if (UnrollMaxCount.getNumOccurrences() > 0) UP.MaxCount = UnrollMaxCount; + if (UnrollMaxUpperBound.getNumOccurrences() > 0) + UP.MaxUpperBound = UnrollMaxUpperBound; if (UnrollFullMaxCount.getNumOccurrences() > 0) UP.FullUnrollMaxCount = UnrollFullMaxCount; if (UnrollAllowPartial.getNumOccurrences() > 0) @@ -777,7 +780,7 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, return TripCount; if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount && - MaxTripCount <= UnrollMaxUpperBound) + MaxTripCount <= UP.MaxUpperBound) return MaxTripCount; // if didn't return until here, should continue to other priorties @@ -952,7 +955,7 @@ bool llvm::computeUnrollCount( // cost of exact full unrolling. As such, if we have an exact count and // found it unprofitable, we'll never chose to bounded unroll. if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) && - MaxTripCount <= UnrollMaxUpperBound) { + MaxTripCount <= UP.MaxUpperBound) { UP.Count = MaxTripCount; if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues, MaxTripCount, UCE, UP)) { @@ -1026,7 +1029,7 @@ bool llvm::computeUnrollCount( } // Don't unroll a small upper bound loop unless user or TTI asked to do so. - if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) { + if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) { UP.Count = 0; return false; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 40b4ea92e1ff..3f02441b74ba 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -2057,7 +2057,7 @@ static void relocationViaAlloca( for (const auto &Info : Records) for (auto RematerializedValuePair : Info.RematerializedValues) { Value *OriginalValue = RematerializedValuePair.second; - if (AllocaMap.count(OriginalValue) != 0) + if (AllocaMap.contains(OriginalValue)) continue; emitAllocaFor(OriginalValue); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp index 24da26c9f0f2..656abdb0abbf 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3285,6 +3285,7 @@ private: (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset || SliceSize != DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() || + !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) || !NewAI.getAllocatedType()->isSingleValueType()); // If we're just going to emit a memcpy, the alloca hasn't changed, and the diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp index 735686ddce38..09991f628224 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp @@ -7,14 +7,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/DXILUpgrade.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "dxil-upgrade" + static bool handleValVerMetadata(Module &M) { NamedMDNode *ValVer = M.getNamedMetadata("dx.valver"); if (!ValVer) return false; + LLVM_DEBUG({ + MDNode *N = ValVer->getOperand(0); + auto X = mdconst::extract<ConstantInt>(N->getOperand(0))->getZExtValue(); + auto Y = mdconst::extract<ConstantInt>(N->getOperand(1))->getZExtValue(); + dbgs() << "DXIL: validation version: " << X << "." << Y << "\n"; + }); // We don't need the validation version internally, so we drop it. ValVer->dropAllReferences(); ValVer->eraseFromParent(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 79ca99d1566c..09e19be0d293 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -405,6 +405,8 @@ int FunctionComparator::cmpConstants(const Constant *L, case Value::ConstantExprVal: { const ConstantExpr *LE = cast<ConstantExpr>(L); const ConstantExpr *RE = cast<ConstantExpr>(R); + if (int Res = cmpNumbers(LE->getOpcode(), RE->getOpcode())) + return Res; unsigned NumOperandsL = LE->getNumOperands(); unsigned NumOperandsR = RE->getNumOperands(); if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) @@ -414,6 +416,29 @@ int FunctionComparator::cmpConstants(const Constant *L, cast<Constant>(RE->getOperand(i)))) return Res; } + if (LE->isCompare()) + if (int Res = cmpNumbers(LE->getPredicate(), RE->getPredicate())) + return Res; + if (auto *GEPL = dyn_cast<GEPOperator>(LE)) { + auto *GEPR = cast<GEPOperator>(RE); + if (int Res = cmpTypes(GEPL->getSourceElementType(), + GEPR->getSourceElementType())) + return Res; + if (int Res = cmpNumbers(GEPL->isInBounds(), GEPR->isInBounds())) + return Res; + if (int Res = cmpNumbers(GEPL->getInRangeIndex().value_or(unsigned(-1)), + GEPR->getInRangeIndex().value_or(unsigned(-1)))) + return Res; + } + if (auto *OBOL = dyn_cast<OverflowingBinaryOperator>(LE)) { + auto *OBOR = cast<OverflowingBinaryOperator>(RE); + if (int Res = + cmpNumbers(OBOL->hasNoUnsignedWrap(), OBOR->hasNoUnsignedWrap())) + return Res; + if (int Res = + cmpNumbers(OBOL->hasNoSignedWrap(), OBOR->hasNoSignedWrap())) + return Res; + } return 0; } case Value::BlockAddressVal: { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp index 1e42d7491676..f94047633022 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -64,7 +64,7 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, // sure that the return is covered. Otherwise, we can check whether there // is a way to reach the RI from the start of the lifetime without passing // through an end. - if (EndBlocks.count(RI->getParent()) > 0 || + if (EndBlocks.contains(RI->getParent()) || !isPotentiallyReachable(Start, RI, &EndBlocks, &DT, &LI)) { ++NumCoveredExits; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 89494a7f6497..55e375670cc6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6293,7 +6293,7 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { } case BitMapKind: { // Type of the bitmap (e.g. i59). - IntegerType *MapTy = BitMap->getType(); + IntegerType *MapTy = BitMap->getIntegerType(); // Cast Index to the same type as the bitmap. // Note: The Index is <= the number of elements in the table, so @@ -6668,7 +6668,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Value *TableIndex; ConstantInt *TableIndexOffset; if (UseSwitchConditionAsTableIndex) { - TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0); + TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0); TableIndex = SI->getCondition(); } else { TableIndexOffset = MinCaseVal; @@ -6752,7 +6752,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Get the TableIndex'th bit of the bitmask. // If this bit is 0 (meaning hole) jump to the default destination, // else continue with table lookup. - IntegerType *MapTy = TableMask->getType(); + IntegerType *MapTy = TableMask->getIntegerType(); Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex"); Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); @@ -6975,7 +6975,7 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, // Replace each case with its trailing zeros number. for (auto &Case : SI->cases()) { auto *OrigValue = Case.getCaseValue(); - Case.setValue(ConstantInt::get(OrigValue->getType(), + Case.setValue(ConstantInt::get(OrigValue->getIntegerType(), OrigValue->getValue().countr_zero())); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 722ed03db3de..42e7c4006b42 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -27,6 +27,7 @@ #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "indvars" @@ -786,8 +787,6 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, /// otherwise. bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand) { - using namespace llvm::PatternMatch; - if (BO->getOpcode() == Instruction::Shl) { bool Changed = false; ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); @@ -1763,7 +1762,7 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri }; // Our raison d'etre! Eliminate sign and zero extension. - if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || + if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && canWidenBySExt()) || (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { Value *NewDef = DU.WideDef; if (DU.NarrowUse->getType() != WideType) { @@ -2011,8 +2010,6 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { /// by looking at dominating conditions inside of the loop void WidenIV::calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser) { - using namespace llvm::PatternMatch; - Value *NarrowDefLHS; const APInt *NarrowDefRHS; if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9d799124074c..32913b3f5569 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3760,40 +3760,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { OrdersType CurrentOrder(NumScalars, NumScalars); SmallVector<int> Positions; SmallBitVector UsedPositions(NumScalars); - DenseMap<const TreeEntry *, unsigned> UsedEntries; - DenseMap<Value *, std::pair<const TreeEntry *, unsigned>> ValueToEntryPos; - for (Value *V : TE.Scalars) { - if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V)) - continue; - const auto *LocalSTE = getTreeEntry(V); - if (!LocalSTE) - continue; - unsigned Lane = - std::distance(LocalSTE->Scalars.begin(), find(LocalSTE->Scalars, V)); - if (Lane >= NumScalars) - continue; - ++UsedEntries.try_emplace(LocalSTE, 0).first->getSecond(); - ValueToEntryPos.try_emplace(V, LocalSTE, Lane); - } - if (UsedEntries.empty()) - return std::nullopt; - const TreeEntry &BestSTE = - *std::max_element(UsedEntries.begin(), UsedEntries.end(), - [](const std::pair<const TreeEntry *, unsigned> &P1, - const std::pair<const TreeEntry *, unsigned> &P2) { - return P1.second < P2.second; - }) - ->first; - UsedEntries.erase(&BestSTE); - const TreeEntry *SecondBestSTE = nullptr; - if (!UsedEntries.empty()) - SecondBestSTE = - std::max_element(UsedEntries.begin(), UsedEntries.end(), - [](const std::pair<const TreeEntry *, unsigned> &P1, - const std::pair<const TreeEntry *, unsigned> &P2) { - return P1.second < P2.second; - }) - ->first; + const TreeEntry *STE = nullptr; // Try to find all gathered scalars that are gets vectorized in other // vectorize node. Here we can have only one single tree vector node to // correctly identify order of the gathered scalars. @@ -3801,46 +3768,53 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { Value *V = TE.Scalars[I]; if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V)) continue; - const auto [LocalSTE, Lane] = ValueToEntryPos.lookup(V); - if (!LocalSTE || (LocalSTE != &BestSTE && LocalSTE != SecondBestSTE)) - continue; - if (CurrentOrder[Lane] != NumScalars) { - if ((CurrentOrder[Lane] >= BestSTE.Scalars.size() || - BestSTE.Scalars[CurrentOrder[Lane]] == V) && - (Lane != I || LocalSTE == SecondBestSTE)) - continue; - UsedPositions.reset(CurrentOrder[Lane]); + if (const auto *LocalSTE = getTreeEntry(V)) { + if (!STE) + STE = LocalSTE; + else if (STE != LocalSTE) + // Take the order only from the single vector node. + return std::nullopt; + unsigned Lane = + std::distance(STE->Scalars.begin(), find(STE->Scalars, V)); + if (Lane >= NumScalars) + return std::nullopt; + if (CurrentOrder[Lane] != NumScalars) { + if (Lane != I) + continue; + UsedPositions.reset(CurrentOrder[Lane]); + } + // The partial identity (where only some elements of the gather node are + // in the identity order) is good. + CurrentOrder[Lane] = I; + UsedPositions.set(I); } - // The partial identity (where only some elements of the gather node are - // in the identity order) is good. - CurrentOrder[Lane] = I; - UsedPositions.set(I); } // Need to keep the order if we have a vector entry and at least 2 scalars or // the vectorized entry has just 2 scalars. - if (BestSTE.Scalars.size() != 2 && UsedPositions.count() <= 1) - return std::nullopt; - auto IsIdentityOrder = [&](ArrayRef<unsigned> CurrentOrder) { - for (unsigned I = 0; I < NumScalars; ++I) - if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars) - return false; - return true; - }; - if (IsIdentityOrder(CurrentOrder)) - return OrdersType(); - auto *It = CurrentOrder.begin(); - for (unsigned I = 0; I < NumScalars;) { - if (UsedPositions.test(I)) { - ++I; - continue; - } - if (*It == NumScalars) { - *It = I; - ++I; + if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) { + auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) { + for (unsigned I = 0; I < NumScalars; ++I) + if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars) + return false; + return true; + }; + if (IsIdentityOrder(CurrentOrder)) + return OrdersType(); + auto *It = CurrentOrder.begin(); + for (unsigned I = 0; I < NumScalars;) { + if (UsedPositions.test(I)) { + ++I; + continue; + } + if (*It == NumScalars) { + *It = I; + ++I; + } + ++It; } - ++It; + return std::move(CurrentOrder); } - return std::move(CurrentOrder); + return std::nullopt; } namespace { @@ -6469,7 +6443,7 @@ bool BoUpSLP::areAllUsersVectorized( Instruction *I, const SmallDenseSet<Value *> *VectorizedVals) const { return (I->hasOneUse() && (!VectorizedVals || VectorizedVals->contains(I))) || all_of(I->users(), [this](User *U) { - return ScalarToTreeEntry.count(U) > 0 || + return ScalarToTreeEntry.contains(U) || isVectorLikeInstWithConstOps(U) || (isa<ExtractElementInst>(U) && MustGather.contains(U)); }); @@ -11498,7 +11472,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *V = Builder.CreateBinOp( static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS); - propagateIRFlags(V, E->Scalars, VL0); + propagateIRFlags(V, E->Scalars, VL0, !MinBWs.contains(E)); if (auto *I = dyn_cast<Instruction>(V)) V = propagateMetadata(I, E->Scalars); @@ -15730,6 +15704,8 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, assert(isValidElementType(V->getType()) && isValidElementType(V2->getType()) && "Expected valid element types only."); + if (V == V2) + return IsCompatibility; auto *CI1 = cast<CmpInst>(V); auto *CI2 = cast<CmpInst>(V2); if (CI1->getOperand(0)->getType()->getTypeID() < @@ -15754,6 +15730,8 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) { auto *Op1 = CI1->getOperand(CI1Preds ? I : E - I - 1); auto *Op2 = CI2->getOperand(CI2Preds ? I : E - I - 1); + if (Op1 == Op2) + continue; if (Op1->getValueID() < Op2->getValueID()) return !IsCompatibility; if (Op1->getValueID() > Op2->getValueID()) @@ -15780,7 +15758,10 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, InstructionsState S = getSameOpcode({I1, I2}, TLI); if (S.getOpcode() && (IsCompatibility || !S.isAltShuffle())) continue; - return !IsCompatibility && I1->getOpcode() < I2->getOpcode(); + if (IsCompatibility) + return false; + if (I1->getOpcode() != I2->getOpcode()) + return I1->getOpcode() < I2->getOpcode(); } } return IsCompatibility; |