diff options
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 20 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 63 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 657 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 111 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp | 134 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 622 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 59 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Utils.cpp | 42 |
17 files changed, 1255 insertions, 532 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index e047996f9aa8..ca4d0986b442 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -76,9 +76,9 @@ bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) { } std::unique_ptr<CSEConfigBase> -llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) { +llvm::getStandardCSEConfigForOpt(CodeGenOptLevel Level) { std::unique_ptr<CSEConfigBase> Config; - if (Level == CodeGenOpt::None) + if (Level == CodeGenOptLevel::None) Config = std::make_unique<CSEConfigConstantOnly>(); else Config = std::make_unique<CSEConfigFull>(); @@ -244,8 +244,6 @@ void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); } void GISelCSEInfo::analyze(MachineFunction &MF) { setMF(MF); for (auto &MBB : MF) { - if (MBB.empty()) - continue; for (MachineInstr &MI : MBB) { if (!shouldCSE(MI.getOpcode())) continue; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 28c33e2038e4..2527b1431289 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -110,6 +110,8 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL); Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg); + Info.IsConvergent = CB.isConvergent(); + if (!Info.CanLowerReturn) { // Callee requires sret demotion. insertSRetOutgoingArgument(MIRBuilder, CB, Info); @@ -356,7 +358,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, if (PartLLT.isVector() == LLTy.isVector() && PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() && (!PartLLT.isVector() || - PartLLT.getNumElements() == LLTy.getNumElements()) && + PartLLT.getElementCount() == LLTy.getElementCount()) && OrigRegs.size() == 1 && Regs.size() == 1) { Register SrcReg = Regs[0]; @@ -404,6 +406,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, // If PartLLT is a mismatched vector in both number of elements and element // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to // have the same elt type, i.e. v4s32. + // TODO: Extend this coersion to element multiples other than just 2. if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() && PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 && Regs.size() == 1) { @@ -845,7 +848,8 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, unsigned NumValues = SplitVTs.size(); Align BaseAlign = DL.getPrefTypeAlign(RetTy); - Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace()); + Type *RetPtrTy = + PointerType::get(RetTy->getContext(), DL.getAllocaAddrSpace()); LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); @@ -1132,7 +1136,7 @@ void CallLowering::ValueHandler::copyArgumentMemory( } Register CallLowering::ValueHandler::extendRegister(Register ValReg, - CCValAssign &VA, + const CCValAssign &VA, unsigned MaxSizeBits) { LLT LocTy{VA.getLocVT()}; LLT ValTy{VA.getValVT()}; @@ -1181,9 +1185,8 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg, void CallLowering::ValueAssigner::anchor() {} -Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA, - Register SrcReg, - LLT NarrowTy) { +Register CallLowering::IncomingValueHandler::buildExtensionHint( + const CCValAssign &VA, Register SrcReg, LLT NarrowTy) { switch (VA.getLocInfo()) { case CCValAssign::LocInfo::ZExt: { return MIRBuilder @@ -1223,9 +1226,8 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) { (DstTy.isPointer() && SrcTy.isScalar()); } -void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg, - Register PhysReg, - CCValAssign VA) { +void CallLowering::IncomingValueHandler::assignValueToReg( + Register ValVReg, Register PhysReg, const CCValAssign &VA) { const MVT LocVT = VA.getLocVT(); const LLT LocTy(LocVT); const LLT RegTy = MRI.getType(ValVReg); diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 748fa273d499..d18e65a83484 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -39,7 +39,6 @@ cl::OptionCategory GICombinerOptionCategory( ); } // end namespace llvm -namespace { /// This class acts as the glue the joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the /// modifications it makes to the MIR to the GISelChangeObserver and the @@ -48,7 +47,7 @@ namespace { /// instruction creation will schedule that instruction for a future visit. /// Other Combiner implementations may require more complex behaviour from /// their GISelChangeObserver subclass. -class WorkListMaintainer : public GISelChangeObserver { +class Combiner::WorkListMaintainer : public GISelChangeObserver { using WorkListTy = GISelWorkList<512>; WorkListTy &WorkList; /// The instructions that have been created but we want to report once they @@ -88,27 +87,46 @@ public: LLVM_DEBUG(CreatedInstrs.clear()); } }; -} -Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC) - : CInfo(Info), TPC(TPC) { +Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo, + const TargetPassConfig *TPC, GISelKnownBits *KB, + GISelCSEInfo *CSEInfo) + : Builder(CSEInfo ? std::make_unique<CSEMIRBuilder>() + : std::make_unique<MachineIRBuilder>()), + WLObserver(std::make_unique<WorkListMaintainer>(WorkList)), + ObserverWrapper(std::make_unique<GISelObserverWrapper>()), CInfo(CInfo), + Observer(*ObserverWrapper), B(*Builder), MF(MF), MRI(MF.getRegInfo()), + KB(KB), TPC(TPC), CSEInfo(CSEInfo) { (void)this->TPC; // FIXME: Remove when used. + + // Setup builder. + B.setMF(MF); + if (CSEInfo) + B.setCSEInfo(CSEInfo); + + // Setup observer. + ObserverWrapper->addObserver(WLObserver.get()); + if (CSEInfo) + ObserverWrapper->addObserver(CSEInfo); + + B.setChangeObserver(*ObserverWrapper); } -bool Combiner::combineMachineInstrs(MachineFunction &MF, - GISelCSEInfo *CSEInfo) { +Combiner::~Combiner() = default; + +bool Combiner::combineMachineInstrs() { // If the ISel pipeline failed, do not bother running this pass. // FIXME: Should this be here or in individual combiner passes. if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel)) return false; - Builder = - CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>(); - MRI = &MF.getRegInfo(); - Builder->setMF(MF); - if (CSEInfo) - Builder->setCSEInfo(CSEInfo); + // We can't call this in the constructor because the derived class is + // uninitialized at that time. + if (!HasSetupMF) { + HasSetupMF = true; + setupMF(MF, KB); + } LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n'); @@ -116,26 +134,23 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, bool MFChanged = false; bool Changed; - MachineIRBuilder &B = *Builder; do { + WorkList.clear(); + // Collect all instructions. Do a post order traversal for basic blocks and // insert with list bottom up, so while we pop_back_val, we'll traverse top // down RPOT. Changed = false; - GISelWorkList<512> WorkList; - WorkListMaintainer Observer(WorkList); - GISelObserverWrapper WrapperObserver(&Observer); - if (CSEInfo) - WrapperObserver.addObserver(CSEInfo); - RAIIDelegateInstaller DelInstall(MF, &WrapperObserver); + + RAIIDelegateInstaller DelInstall(MF, ObserverWrapper.get()); for (MachineBasicBlock *MBB : post_order(&MF)) { for (MachineInstr &CurMI : llvm::make_early_inc_range(llvm::reverse(*MBB))) { // Erase dead insts before even adding to the list. - if (isTriviallyDead(CurMI, *MRI)) { + if (isTriviallyDead(CurMI, MRI)) { LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n"); - llvm::salvageDebugInfo(*MRI, CurMI); + llvm::salvageDebugInfo(MRI, CurMI); CurMI.eraseFromParent(); continue; } @@ -147,8 +162,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, while (!WorkList.empty()) { MachineInstr *CurrInst = WorkList.pop_back_val(); LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;); - Changed |= CInfo.combine(WrapperObserver, *CurrInst, B); - Observer.reportFullyCreatedInstrs(); + Changed |= tryCombineAll(*CurrInst); + WLObserver->reportFullyCreatedInstrs(); } MFChanged |= Changed; } while (Changed); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index cc7fb3ee1109..91a64d59e154 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" @@ -395,6 +397,39 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, replaceRegWith(MRI, DstReg, NewDstReg); } +bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Invalid instruction kind"); + + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + return Mask.size() == 1; +} + +void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + + int I = MI.getOperand(3).getShuffleMask()[0]; + Register Src1 = MI.getOperand(1).getReg(); + LLT Src1Ty = MRI.getType(Src1); + int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; + Register SrcReg; + if (I >= Src1NumElts) { + SrcReg = MI.getOperand(2).getReg(); + I -= Src1NumElts; + } else if (I >= 0) + SrcReg = Src1; + + if (I < 0) + Builder.buildUndef(DstReg); + else if (!MRI.getType(SrcReg).isVector()) + Builder.buildCopy(DstReg, SrcReg); + else + Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I); + + MI.eraseFromParent(); +} + namespace { /// Select a preference between two uses. CurrentUse is the current preference @@ -910,160 +945,332 @@ void CombinerHelper::applySextInRegOfLoad( MI.eraseFromParent(); } -bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, - Register &Base, Register &Offset) { - auto &MF = *MI.getParent()->getParent(); - const auto &TLI = *MF.getSubtarget().getTargetLowering(); +static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { + if (Ty.isVector()) + return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getNumElements()); + return IntegerType::get(C, Ty.getSizeInBits()); +} -#ifndef NDEBUG - unsigned Opcode = MI.getOpcode(); - assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || - Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); -#endif +/// Return true if 'MI' is a load or a store that may be fold it's address +/// operand into the load / store addressing mode. +static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, + MachineRegisterInfo &MRI) { + TargetLowering::AddrMode AM; + auto *MF = MI->getMF(); + auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI); + if (!Addr) + return false; + + AM.HasBaseReg = true; + if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI)) + AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm] + else + AM.Scale = 1; // [reg +/- reg] - Base = MI.getOperand(1).getReg(); - MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); - if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return TLI.isLegalAddressingMode( + MF->getDataLayout(), AM, + getTypeForLLT(MI->getMMO().getMemoryType(), + MF->getFunction().getContext()), + MI->getMMO().getAddrSpace()); +} + +static unsigned getIndexedOpc(unsigned LdStOpc) { + switch (LdStOpc) { + case TargetOpcode::G_LOAD: + return TargetOpcode::G_INDEXED_LOAD; + case TargetOpcode::G_STORE: + return TargetOpcode::G_INDEXED_STORE; + case TargetOpcode::G_ZEXTLOAD: + return TargetOpcode::G_INDEXED_ZEXTLOAD; + case TargetOpcode::G_SEXTLOAD: + return TargetOpcode::G_INDEXED_SEXTLOAD; + default: + llvm_unreachable("Unexpected opcode"); + } +} + +bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const { + // Check for legality. + LLT PtrTy = MRI.getType(LdSt.getPointerReg()); + LLT Ty = MRI.getType(LdSt.getReg(0)); + LLT MemTy = LdSt.getMMO().getMemoryType(); + SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( + {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}}); + unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode()); + SmallVector<LLT> OpTys; + if (IndexedOpc == TargetOpcode::G_INDEXED_STORE) + OpTys = {PtrTy, Ty, Ty}; + else + OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD + + LegalityQuery Q(IndexedOpc, OpTys, MemDescrs); + return isLegal(Q); +} + +static cl::opt<unsigned> PostIndexUseThreshold( + "post-index-use-threshold", cl::Hidden, cl::init(32), + cl::desc("Number of uses of a base pointer to check before it is no longer " + "considered for post-indexing.")); + +bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr, + Register &Base, Register &Offset, + bool &RematOffset) { + // We're looking for the following pattern, for either load or store: + // %baseptr:_(p0) = ... + // G_STORE %val(s64), %baseptr(p0) + // %offset:_(s64) = G_CONSTANT i64 -256 + // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64) + const auto &TLI = getTargetLowering(); + + Register Ptr = LdSt.getPointerReg(); + // If the store is the only use, don't bother. + if (MRI.hasOneNonDBGUse(Ptr)) return false; - LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); - // FIXME: The following use traversal needs a bail out for patholigical cases. - for (auto &Use : MRI.use_nodbg_instructions(Base)) { - if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) + if (!isIndexedLoadStoreLegal(LdSt)) + return false; + + if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI)) + return false; + + MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI); + auto *PtrDef = MRI.getVRegDef(Ptr); + + unsigned NumUsesChecked = 0; + for (auto &Use : MRI.use_nodbg_instructions(Ptr)) { + if (++NumUsesChecked > PostIndexUseThreshold) + return false; // Try to avoid exploding compile time. + + auto *PtrAdd = dyn_cast<GPtrAdd>(&Use); + // The use itself might be dead. This can happen during combines if DCE + // hasn't had a chance to run yet. Don't allow it to form an indexed op. + if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0))) + continue; + + // Check the user of this isn't the store, otherwise we'd be generate a + // indexed store defining its own use. + if (StoredValDef == &Use) continue; - Offset = Use.getOperand(2).getReg(); + Offset = PtrAdd->getOffsetReg(); if (!ForceLegalIndexing && - !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { - LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " - << Use); + !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset, + /*IsPre*/ false, MRI)) continue; - } // Make sure the offset calculation is before the potentially indexed op. - // FIXME: we really care about dependency here. The offset calculation might - // be movable. - MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); - if (!OffsetDef || !dominates(*OffsetDef, MI)) { - LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " - << Use); - continue; + MachineInstr *OffsetDef = MRI.getVRegDef(Offset); + RematOffset = false; + if (!dominates(*OffsetDef, LdSt)) { + // If the offset however is just a G_CONSTANT, we can always just + // rematerialize it where we need it. + if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT) + continue; + RematOffset = true; } - // FIXME: check whether all uses of Base are load/store with foldable - // addressing modes. If so, using the normal addr-modes is better than - // forming an indexed one. + for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) { + if (&BasePtrUse == PtrDef) + continue; - bool MemOpDominatesAddrUses = true; - for (auto &PtrAddUse : - MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) { - if (!dominates(MI, PtrAddUse)) { - MemOpDominatesAddrUses = false; - break; - } - } + // If the user is a later load/store that can be post-indexed, then don't + // combine this one. + auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse); + if (BasePtrLdSt && BasePtrLdSt != &LdSt && + dominates(LdSt, *BasePtrLdSt) && + isIndexedLoadStoreLegal(*BasePtrLdSt)) + return false; - if (!MemOpDominatesAddrUses) { - LLVM_DEBUG( - dbgs() << " Ignoring candidate as memop does not dominate uses: " - << Use); - continue; + // Now we're looking for the key G_PTR_ADD instruction, which contains + // the offset add that we want to fold. + if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) { + Register PtrAddDefReg = BasePtrUseDef->getReg(0); + for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) { + // If the use is in a different block, then we may produce worse code + // due to the extra register pressure. + if (BaseUseUse.getParent() != LdSt.getParent()) + return false; + + if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse)) + if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI)) + return false; + } + if (!dominates(LdSt, BasePtrUse)) + return false; // All use must be dominated by the load/store. + } } - LLVM_DEBUG(dbgs() << " Found match: " << Use); - Addr = Use.getOperand(0).getReg(); + Addr = PtrAdd->getReg(0); + Base = PtrAdd->getBaseReg(); return true; } return false; } -bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, +bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr, Register &Base, Register &Offset) { - auto &MF = *MI.getParent()->getParent(); + auto &MF = *LdSt.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); -#ifndef NDEBUG - unsigned Opcode = MI.getOpcode(); - assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || - Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); -#endif - - Addr = MI.getOperand(1).getReg(); - MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); - if (!AddrDef || MRI.hasOneNonDBGUse(Addr)) + Addr = LdSt.getPointerReg(); + if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) || + MRI.hasOneNonDBGUse(Addr)) return false; - Base = AddrDef->getOperand(1).getReg(); - Offset = AddrDef->getOperand(2).getReg(); - - LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); - if (!ForceLegalIndexing && - !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { - LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); + !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI)) + return false; + + if (!isIndexedLoadStoreLegal(LdSt)) return false; - } MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); - if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { - LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); + if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) return false; - } - if (MI.getOpcode() == TargetOpcode::G_STORE) { + if (auto *St = dyn_cast<GStore>(&LdSt)) { // Would require a copy. - if (Base == MI.getOperand(0).getReg()) { - LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); + if (Base == St->getValueReg()) return false; - } // We're expecting one use of Addr in MI, but it could also be the // value stored, which isn't actually dominated by the instruction. - if (MI.getOperand(0).getReg() == Addr) { - LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); + if (St->getValueReg() == Addr) return false; - } } + // Avoid increasing cross-block register pressure. + for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) + if (AddrUse.getParent() != LdSt.getParent()) + return false; + // FIXME: check whether all uses of the base pointer are constant PtrAdds. // That might allow us to end base's liveness here by adjusting the constant. + bool RealUse = false; + for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) { + if (!dominates(LdSt, AddrUse)) + return false; // All use must be dominated by the load/store. - for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) { - if (!dominates(MI, UseMI)) { - LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); - return false; + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) { + if (!canFoldInAddressingMode(UseLdSt, TLI, MRI)) + RealUse = true; + } else { + RealUse = true; } } - - return true; + return RealUse; } -bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { - IndexedLoadStoreMatchInfo MatchInfo; - if (matchCombineIndexedLoadStore(MI, MatchInfo)) { - applyCombineIndexedLoadStore(MI, MatchInfo); - return true; +bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); + + // Check if there is a load that defines the vector being extracted from. + auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI); + if (!LoadMI) + return false; + + Register Vector = MI.getOperand(1).getReg(); + LLT VecEltTy = MRI.getType(Vector).getElementType(); + + assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy); + + // Checking whether we should reduce the load width. + if (!MRI.hasOneNonDBGUse(Vector)) + return false; + + // Check if the defining load is simple. + if (!LoadMI->isSimple()) + return false; + + // If the vector element type is not a multiple of a byte then we are unable + // to correctly compute an address to load only the extracted element as a + // scalar. + if (!VecEltTy.isByteSized()) + return false; + + // Check if the new load that we are going to create is legal + // if we are in the post-legalization phase. + MachineMemOperand MMO = LoadMI->getMMO(); + Align Alignment = MMO.getAlign(); + MachinePointerInfo PtrInfo; + uint64_t Offset; + + // Finding the appropriate PtrInfo if offset is a known constant. + // This is required to create the memory operand for the narrowed load. + // This machine memory operand object helps us infer about legality + // before we proceed to combine the instruction. + if (auto CVal = getIConstantVRegVal(Vector, MRI)) { + int Elt = CVal->getZExtValue(); + // FIXME: should be (ABI size)*Elt. + Offset = VecEltTy.getSizeInBits() * Elt / 8; + PtrInfo = MMO.getPointerInfo().getWithOffset(Offset); + } else { + // Discard the pointer info except the address space because the memory + // operand can't represent this new access since the offset is variable. + Offset = VecEltTy.getSizeInBits() / 8; + PtrInfo = MachinePointerInfo(MMO.getPointerInfo().getAddrSpace()); } - return false; -} -bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { - unsigned Opcode = MI.getOpcode(); - if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && - Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) + Alignment = commonAlignment(Alignment, Offset); + + Register VecPtr = LoadMI->getPointerReg(); + LLT PtrTy = MRI.getType(VecPtr); + + MachineFunction &MF = *MI.getMF(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy); + + LegalityQuery::MemDesc MMDesc(*NewMMO); + + LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}; + + if (!isLegalOrBeforeLegalizer(Q)) return false; - // For now, no targets actually support these opcodes so don't waste time - // running these unless we're forced to for testing. - if (!ForceLegalIndexing) + // Load must be allowed and fast on the target. + LLVMContext &C = MF.getFunction().getContext(); + auto &DL = MF.getDataLayout(); + unsigned Fast = 0; + if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO, + &Fast) || + !Fast) return false; - MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, + Register Result = MI.getOperand(0).getReg(); + Register Index = MI.getOperand(2).getReg(); + + MatchInfo = [=](MachineIRBuilder &B) { + GISelObserverWrapper DummyObserver; + LegalizerHelper Helper(B.getMF(), DummyObserver, B); + //// Get pointer to the vector element. + Register finalPtr = Helper.getVectorElementPointer( + LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()), + Index); + // New G_LOAD instruction. + B.buildLoad(Result, finalPtr, PtrInfo, Alignment); + // Remove original GLOAD instruction. + LoadMI->eraseFromParent(); + }; + + return true; +} + +bool CombinerHelper::matchCombineIndexedLoadStore( + MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { + auto &LdSt = cast<GLoadStore>(MI); + + if (LdSt.isAtomic()) + return false; + + MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base, MatchInfo.Offset); if (!MatchInfo.IsPre && - !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, - MatchInfo.Offset)) + !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base, + MatchInfo.Offset, MatchInfo.RematOffset)) return false; return true; @@ -1072,28 +1279,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS void CombinerHelper::applyCombineIndexedLoadStore( MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); - MachineIRBuilder MIRBuilder(MI); + Builder.setInstrAndDebugLoc(MI); unsigned Opcode = MI.getOpcode(); bool IsStore = Opcode == TargetOpcode::G_STORE; - unsigned NewOpcode; - switch (Opcode) { - case TargetOpcode::G_LOAD: - NewOpcode = TargetOpcode::G_INDEXED_LOAD; - break; - case TargetOpcode::G_SEXTLOAD: - NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; - break; - case TargetOpcode::G_ZEXTLOAD: - NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; - break; - case TargetOpcode::G_STORE: - NewOpcode = TargetOpcode::G_INDEXED_STORE; - break; - default: - llvm_unreachable("Unknown load/store opcode"); + unsigned NewOpcode = getIndexedOpc(Opcode); + + // If the offset constant didn't happen to dominate the load/store, we can + // just clone it as needed. + if (MatchInfo.RematOffset) { + auto *OldCst = MRI.getVRegDef(MatchInfo.Offset); + auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset), + *OldCst->getOperand(1).getCImm()); + MatchInfo.Offset = NewCst.getReg(0); } - auto MIB = MIRBuilder.buildInstr(NewOpcode); + auto MIB = Builder.buildInstr(NewOpcode); if (IsStore) { MIB.addDef(MatchInfo.Addr); MIB.addUse(MI.getOperand(0).getReg()); @@ -1105,6 +1305,7 @@ void CombinerHelper::applyCombineIndexedLoadStore( MIB.addUse(MatchInfo.Base); MIB.addUse(MatchInfo.Offset); MIB.addImm(MatchInfo.IsPre); + MIB->cloneMemRefs(*MI.getMF(), MI); MI.eraseFromParent(); AddrDef.eraseFromParent(); @@ -1271,13 +1472,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI, Observer.changedInstr(*BrCond); } -static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { - if (Ty.isVector()) - return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), - Ty.getNumElements()); - return IntegerType::get(C, Ty.getSizeInBits()); -} - + bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; @@ -1394,7 +1589,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, if (AccessTy) { AMNew.HasBaseReg = true; TargetLoweringBase::AddrMode AMOld; - AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue(); + AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue(); AMOld.HasBaseReg = true; unsigned AS = MRI.getType(Add2).getAddressSpace(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); @@ -1456,7 +1651,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, // Pass the combined immediate to the apply function. MatchInfo.Imm = - (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); + (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue(); MatchInfo.Reg = Base; // There is no simple replacement for a saturating unsigned left shift that @@ -1535,7 +1730,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, // Find a matching one-use shift by constant. const Register C1 = MI.getOperand(2).getReg(); auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI); - if (!MaybeImmVal) + if (!MaybeImmVal || MaybeImmVal->Value == 0) return false; const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); @@ -1685,6 +1880,8 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI, bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) { assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); + if (!getTargetLowering().isDesirableToPullExtFromShl(MI)) + return false; Register LHS = MI.getOperand(1).getReg(); @@ -2248,35 +2445,6 @@ void CombinerHelper::applyCombineExtOfExt( } } -void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - - Builder.setInstrAndDebugLoc(MI); - Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, - MI.getFlags()); - MI.eraseFromParent(); -} - -bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI, - BuildFnTy &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); - Register Src = MI.getOperand(1).getReg(); - Register NegSrc; - - if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc)))) - return false; - - MatchInfo = [=, &MI](MachineIRBuilder &B) { - Observer.changingInstr(MI); - MI.getOperand(1).setReg(NegSrc); - Observer.changedInstr(MI); - }; - return true; -} - bool CombinerHelper::matchCombineTruncOfExt( MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); @@ -2580,6 +2748,16 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { MaybeCst->getSExtValue() == C; } +bool CombinerHelper::matchConstantFPOp(const MachineOperand &MOP, double C) { + if (!MOP.isReg()) + return false; + std::optional<FPValueAndVReg> MaybeCst; + if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst))) + return false; + + return MaybeCst->Value.isExactlyValue(C); +} + void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); @@ -2599,6 +2777,45 @@ void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, replaceRegWith(MRI, OldReg, Replacement); } +bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI, + unsigned ConstIdx) { + Register ConstReg = MI.getOperand(ConstIdx).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // Get the shift amount + auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI); + if (!VRegAndVal) + return false; + + // Return true of shift amount >= Bitwidth + return (VRegAndVal->Value.uge(DstTy.getSizeInBits())); +} + +void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) { + assert((MI.getOpcode() == TargetOpcode::G_FSHL || + MI.getOpcode() == TargetOpcode::G_FSHR) && + "This is not a funnel shift operation"); + + Register ConstReg = MI.getOperand(3).getReg(); + LLT ConstTy = MRI.getType(ConstReg); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI); + assert((VRegAndVal) && "Value is not a constant"); + + // Calculate the new Shift Amount = Old Shift Amount % BitWidth + APInt NewConst = VRegAndVal->Value.urem( + APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits())); + + Builder.setInstrAndDebugLoc(MI); + auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue()); + Builder.buildInstr( + MI.getOpcode(), {MI.getOperand(0)}, + {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)}); + + MI.eraseFromParent(); +} + bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SELECT); // Match (cond ? x : x) @@ -2652,6 +2869,13 @@ void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { MI.eraseFromParent(); } +void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) { + assert(MI.getNumDefs() == 1 && "Expected only one def?"); + Builder.setInstr(MI); + Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF()); + MI.eraseFromParent(); +} + void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); @@ -3246,7 +3470,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned BinOpcode = MI.getOpcode(); - // We know know one of the operands is a select of constants. Now verify that + // We know that one of the operands is a select of constants. Now verify that // the other binary operator operand is either a constant, or we can handle a // variable. bool CanFoldNonConst = @@ -4141,8 +4365,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd( Register Dst = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Dst); LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); - if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal( - TargetOpcode::G_UBFX, Ty, ExtractTy)) + if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}})) return false; int64_t AndImm, LSBImm; @@ -4228,8 +4451,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( const Register Dst = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Dst); LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); - if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal( - TargetOpcode::G_UBFX, Ty, ExtractTy)) + if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}})) return false; // Try to match shr (and x, c1), c2 @@ -4279,20 +4501,20 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( } bool CombinerHelper::reassociationCanBreakAddressingModePattern( - MachineInstr &PtrAdd) { - assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); + MachineInstr &MI) { + auto &PtrAdd = cast<GPtrAdd>(MI); - Register Src1Reg = PtrAdd.getOperand(1).getReg(); - MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI); + Register Src1Reg = PtrAdd.getBaseReg(); + auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI); if (!Src1Def) return false; - Register Src2Reg = PtrAdd.getOperand(2).getReg(); + Register Src2Reg = PtrAdd.getOffsetReg(); if (MRI.hasOneNonDBGUse(Src1Reg)) return false; - auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); + auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI); if (!C1) return false; auto C2 = getIConstantVRegVal(Src2Reg, MRI); @@ -4303,7 +4525,7 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern( const APInt &C2APIntVal = *C2; const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue(); - for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) { + for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) { // This combine may end up running before ptrtoint/inttoptr combines // manage to eliminate redundant conversions, so try to look through them. MachineInstr *ConvUseMI = &UseMI; @@ -4316,9 +4538,8 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern( ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg); ConvUseOpc = ConvUseMI->getOpcode(); } - auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD || - ConvUseOpc == TargetOpcode::G_STORE; - if (!LoadStore) + auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI); + if (!LdStMI) continue; // Is x[offset2] already not a legal addressing mode? If so then // reassociating the constants breaks nothing (we test offset2 because @@ -4326,11 +4547,9 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern( TargetLoweringBase::AddrMode AM; AM.HasBaseReg = true; AM.BaseOffs = C2APIntVal.getSExtValue(); - unsigned AS = - MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace(); - Type *AccessTy = - getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()), - PtrAdd.getMF()->getFunction().getContext()); + unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace(); + Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(), + PtrAdd.getMF()->getFunction().getContext()); const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering(); if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, AccessTy, AS)) @@ -4519,7 +4738,19 @@ bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI, return false; } -bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { +bool CombinerHelper::matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register SrcOp = MI.getOperand(1).getReg(); + + if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) { + MatchInfo = *MaybeCst; + return true; + } + + return false; +} + +bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) { Register Op1 = MI.getOperand(1).getReg(); Register Op2 = MI.getOperand(2).getReg(); auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI); @@ -4529,6 +4760,42 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { return true; } +bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo) { + Register Op1 = MI.getOperand(1).getReg(); + Register Op2 = MI.getOperand(2).getReg(); + auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI); + if (!MaybeCst) + return false; + MatchInfo = + ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst); + return true; +} + +bool CombinerHelper::matchConstantFoldFMA(MachineInstr &MI, + ConstantFP *&MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMA || + MI.getOpcode() == TargetOpcode::G_FMAD); + auto [_, Op1, Op2, Op3] = MI.getFirst4Regs(); + + const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI); + if (!Op3Cst) + return false; + + const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI); + if (!Op2Cst) + return false; + + const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI); + if (!Op1Cst) + return false; + + APFloat Op1F = Op1Cst->getValueAPF(); + Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(), + APFloat::rmNearestTiesToEven); + MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F); + return true; +} + bool CombinerHelper::matchNarrowBinopFeedingAnd( MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { // Look for a binop feeding into an AND with a mask: @@ -6018,12 +6285,36 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig); } -bool CombinerHelper::tryCombine(MachineInstr &MI) { - if (tryCombineCopy(MI)) - return true; - if (tryCombineExtendingLoads(MI)) - return true; - if (tryCombineIndexedLoadStore(MI)) +bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) { + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + auto *LHSDef = MRI.getVRegDef(LHS); + if (getIConstantVRegVal(LHS, MRI).has_value()) return true; - return false; + + // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute + // as long as we don't already have a constant on the RHS. + if (LHSDef->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER) + return false; + return MRI.getVRegDef(RHS)->getOpcode() != + TargetOpcode::G_CONSTANT_FOLD_BARRIER && + !getIConstantVRegVal(RHS, MRI); +} + +bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) { + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + std::optional<FPValueAndVReg> ValAndVReg; + if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg))) + return false; + return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg)); +} + +void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) { + Observer.changingInstr(MI); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + MI.getOperand(1).setReg(RHSReg); + MI.getOperand(2).setReg(LHSReg); + Observer.changedInstr(MI); } diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp index d747cbf5aadc..26752369a771 100644 --- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp @@ -26,12 +26,19 @@ GIMatchTableExecutor::MatcherState::MatcherState(unsigned MaxRenderers) GIMatchTableExecutor::GIMatchTableExecutor() = default; -bool GIMatchTableExecutor::isOperandImmEqual( - const MachineOperand &MO, int64_t Value, - const MachineRegisterInfo &MRI) const { - if (MO.isReg() && MO.getReg()) +bool GIMatchTableExecutor::isOperandImmEqual(const MachineOperand &MO, + int64_t Value, + const MachineRegisterInfo &MRI, + bool Splat) const { + if (MO.isReg() && MO.getReg()) { if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI)) return VRegVal->Value.getSExtValue() == Value; + + if (Splat) { + if (auto VRegVal = getIConstantSplatVal(MO.getReg(), MRI)) + return VRegVal->getSExtValue() == Value; + } + } return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 363ffbfa90b5..ea8c20cdcd45 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gisel-known-bits" @@ -48,6 +49,8 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { } case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + case TargetOpcode::G_INTRINSIC_CONVERGENT: + case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: default: return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1); } @@ -72,7 +75,7 @@ KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts, assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared"); KnownBits Known; - computeKnownBitsImpl(R, Known, DemandedElts); + computeKnownBitsImpl(R, Known, DemandedElts, Depth); ComputeKnownBitsCache.clear(); return Known; } @@ -726,6 +729,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, } case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + case TargetOpcode::G_INTRINSIC_CONVERGENT: + case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: default: { unsigned NumBits = TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth); @@ -769,3 +774,12 @@ void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; } + +GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { + if (!Info) { + unsigned MaxDepth = + MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; + Info = std::make_unique<GISelKnownBits>(MF, MaxDepth); + } + return *Info.get(); +} diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 9a67a8d05a4d..14a4e72152e7 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -62,6 +62,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" @@ -80,6 +81,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/MemoryOpRemark.h" #include <algorithm> #include <cassert> @@ -127,7 +129,7 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator(CodeGenOpt::Level optlevel) +IRTranslator::IRTranslator(CodeGenOptLevel optlevel) : MachineFunctionPass(ID), OptLevel(optlevel) {} #ifndef NDEBUG @@ -173,7 +175,7 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelCSEAnalysisWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); - if (OptLevel != CodeGenOpt::None) { + if (OptLevel != CodeGenOptLevel::None) { AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); } @@ -358,7 +360,7 @@ bool IRTranslator::translateCompare(const User &U, bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { const ReturnInst &RI = cast<ReturnInst>(U); const Value *Ret = RI.getReturnValue(); - if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0) + if (Ret && DL->getTypeStoreSize(Ret->getType()).isZero()) Ret = nullptr; ArrayRef<Register> VRegs; @@ -578,7 +580,8 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { if (BrInst.isUnconditional()) { // If the unconditional target is the layout successor, fallthrough. - if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB)) + if (OptLevel == CodeGenOptLevel::None || + !CurMBB.isLayoutSuccessor(Succ0MBB)) MIRBuilder.buildBr(*Succ0MBB); // Link successors. @@ -720,7 +723,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { return true; } - SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr); + SL->findJumpTables(Clusters, &SI, std::nullopt, DefaultMBB, nullptr, nullptr); SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ @@ -766,7 +769,7 @@ void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT, MIB.setMBB(*MBB); MIB.setDebugLoc(CurBuilder->getDebugLoc()); - Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); + Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext()); const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); auto Table = MIB.buildJumpTable(PtrTy, JT.JTI); @@ -789,7 +792,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - Type *PtrIRTy = SValue.getType()->getPointerTo(); + auto *PtrIRTy = PointerType::getUnqual(SValue.getContext()); const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy)); Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub); @@ -1014,7 +1017,7 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0); auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg); - Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); + Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext()); const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); LLT MaskTy = SwitchOpTy; @@ -1483,6 +1486,9 @@ bool IRTranslator::translateBitCast(const User &U, bool IRTranslator::translateCast(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { + if (U.getType()->getScalarType()->isBFloatTy() || + U.getOperand(0)->getType()->getScalarType()->isBFloatTy()) + return false; Register Op = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); MIRBuilder.buildInstr(Opcode, {Res}, {Op}); @@ -1498,6 +1504,12 @@ bool IRTranslator::translateGetElementPtr(const User &U, Type *OffsetIRTy = DL->getIndexType(PtrIRTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + uint32_t Flags = 0; + if (isa<Instruction>(U)) { + const Instruction &I = cast<Instruction>(U); + Flags = MachineInstr::copyFlagsFromInstruction(I); + } + // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = 0; @@ -1578,7 +1590,12 @@ bool IRTranslator::translateGetElementPtr(const User &U, if (Offset != 0) { auto OffsetMIB = MIRBuilder.buildConstant(OffsetTy, Offset); - MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0)); + + if (int64_t(Offset) >= 0 && cast<GEPOperator>(U).isInBounds()) + Flags |= MachineInstr::MIFlag::NoUWrap; + + MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0), + Flags); return true; } @@ -1742,6 +1759,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_FEXP; case Intrinsic::exp2: return TargetOpcode::G_FEXP2; + case Intrinsic::exp10: + return TargetOpcode::G_FEXP10; case Intrinsic::fabs: return TargetOpcode::G_FABS; case Intrinsic::copysign: @@ -1797,6 +1816,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_VECREDUCE_FMIN; case Intrinsic::vector_reduce_fmax: return TargetOpcode::G_VECREDUCE_FMAX; + case Intrinsic::vector_reduce_fminimum: + return TargetOpcode::G_VECREDUCE_FMINIMUM; + case Intrinsic::vector_reduce_fmaximum: + return TargetOpcode::G_VECREDUCE_FMAXIMUM; case Intrinsic::vector_reduce_add: return TargetOpcode::G_VECREDUCE_ADD; case Intrinsic::vector_reduce_mul: @@ -1819,6 +1842,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_LROUND; case Intrinsic::llround: return TargetOpcode::G_LLROUND; + case Intrinsic::get_fpmode: + return TargetOpcode::G_GET_FPMODE; } return Intrinsic::not_intrinsic; } @@ -1939,6 +1964,8 @@ bool IRTranslator::translateIfEntryValueArgument( if (!PhysReg) return false; + // Append an op deref to account for the fact that this is a dbg_declare. + Expr = DIExpression::append(Expr, dwarf::DW_OP_deref); MF->setVariableDbgInfo(DebugInst.getVariable(), Expr, *PhysReg, DebugInst.getDebugLoc()); return true; @@ -1966,7 +1993,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { // No stack colouring in O0, discard region information. - if (MF->getTarget().getOptLevel() == CodeGenOpt::None) + if (MF->getTarget().getOptLevel() == CodeGenOptLevel::None) return true; unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START @@ -2041,12 +2068,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, auto &TLI = *MF->getSubtarget().getTargetLowering(); Value *Ptr = CI.getArgOperand(0); unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8; + Align Alignment = getKnownAlignment(Ptr, *DL); - // FIXME: Get alignment MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)}) .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr), MachineMemOperand::MOStore, - ListSize, Align(1))); + ListSize, Alignment)); return true; } case Intrinsic::dbg_value: { @@ -2229,31 +2256,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } case Intrinsic::stacksave: { - // Save the stack pointer to the location provided by the intrinsic. - Register Reg = getOrCreateVReg(CI); - Register StackPtr = MF->getSubtarget() - .getTargetLowering() - ->getStackPointerRegisterToSaveRestore(); - - // If the target doesn't specify a stack pointer, then fall back. - if (!StackPtr) - return false; - - MIRBuilder.buildCopy(Reg, StackPtr); + MIRBuilder.buildInstr(TargetOpcode::G_STACKSAVE, {getOrCreateVReg(CI)}, {}); return true; } case Intrinsic::stackrestore: { - // Restore the stack pointer from the location provided by the intrinsic. - Register Reg = getOrCreateVReg(*CI.getArgOperand(0)); - Register StackPtr = MF->getSubtarget() - .getTargetLowering() - ->getStackPointerRegisterToSaveRestore(); - - // If the target doesn't specify a stack pointer, then fall back. - if (!StackPtr) - return false; - - MIRBuilder.buildCopy(StackPtr, Reg); + MIRBuilder.buildInstr(TargetOpcode::G_STACKRESTORE, {}, + {getOrCreateVReg(*CI.getArgOperand(0))}); return true; } case Intrinsic::cttz: @@ -2387,6 +2395,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; return CLI->lowerCall(MIRBuilder, Info); } + case Intrinsic::amdgcn_cs_chain: + return translateCallBase(CI, MIRBuilder); case Intrinsic::fptrunc_round: { uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI); @@ -2415,6 +2425,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } + case Intrinsic::set_fpmode: { + Value *FPState = CI.getOperand(0); + MIRBuilder.buildInstr(TargetOpcode::G_SET_FPMODE, {}, + { getOrCreateVReg(*FPState) }); + return true; + } + case Intrinsic::reset_fpmode: { + MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {}); + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -2493,7 +2513,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { auto TII = MF->getTarget().getIntrinsicInfo(); const Function *F = CI.getCalledFunction(); - // FIXME: support Windows dllimport function calls. + // FIXME: support Windows dllimport function calls and calls through + // weak symbols. if (F && (F->hasDLLImportStorageClass() || (MF->getTarget().getTargetTriple().isOSWindows() && F->hasExternalWeakLinkage()))) @@ -2533,8 +2554,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { // Ignore the callsite attributes. Backend code is most likely not expecting // an intrinsic to sometimes have side effects and sometimes not. - MachineInstrBuilder MIB = - MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory()); + MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs); if (isa<FPMathOperator>(CI)) MIB->copyIRFlags(CI); @@ -2676,6 +2696,13 @@ bool IRTranslator::translateInvoke(const User &U, if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI())) return false; + // FIXME: support Windows dllimport function calls and calls through + // weak symbols. + if (Fn && (Fn->hasDLLImportStorageClass() || + (MF->getTarget().getTargetTriple().isOSWindows() && + Fn->hasExternalWeakLinkage()))) + return false; + bool LowerInlineAsm = I.isInlineAsm(); bool NeedEHLabel = true; @@ -2868,7 +2895,7 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) { } bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) { - if (!MF->getTarget().Options.TrapUnreachable) + if (!MF->getTarget().Options.TrapUnreachable) return true; auto &UI = cast<UnreachableInst>(U); @@ -2885,7 +2912,7 @@ bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuil } } - MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true); + MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>()); return true; } @@ -3321,7 +3348,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, CurBuilder->setInsertPt(*ParentBB, ParentBB->end()); // First create the loads to the guard/stack slot for the comparison. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); - Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); + Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext()); const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL)); @@ -3331,7 +3358,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, Register Guard; Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0); const Module &M = *ParentBB->getParent()->getFunction().getParent(); - Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); + Align Align = DL->getPrefTypeAlign(PointerType::getUnqual(M.getContext())); // Generate code to load the content of the guard slot. Register GuardVal = @@ -3500,7 +3527,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { ORE = std::make_unique<OptimizationRemarkEmitter>(&F); const TargetMachine &TM = MF->getTarget(); TM.resetTargetOptions(F); - EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); + EnableOpts = OptLevel != CodeGenOptLevel::None && !skipFunction(F); FuncInfo.MF = MF; if (EnableOpts) { AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 3925611f1485..4089a5e941b0 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -133,71 +133,6 @@ static void getRegistersForValue(MachineFunction &MF, } } -/// Return an integer indicating how general CT is. -static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { - switch (CT) { - case TargetLowering::C_Immediate: - case TargetLowering::C_Other: - case TargetLowering::C_Unknown: - return 0; - case TargetLowering::C_Register: - return 1; - case TargetLowering::C_RegisterClass: - return 2; - case TargetLowering::C_Memory: - case TargetLowering::C_Address: - return 3; - } - llvm_unreachable("Invalid constraint type"); -} - -static void chooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, - const TargetLowering *TLI) { - assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); - unsigned BestIdx = 0; - TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; - int BestGenerality = -1; - - // Loop over the options, keeping track of the most general one. - for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { - TargetLowering::ConstraintType CType = - TLI->getConstraintType(OpInfo.Codes[i]); - - // Indirect 'other' or 'immediate' constraints are not allowed. - if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || - CType == TargetLowering::C_Register || - CType == TargetLowering::C_RegisterClass)) - continue; - - // If this is an 'other' or 'immediate' constraint, see if the operand is - // valid for it. For example, on X86 we might have an 'rI' constraint. If - // the operand is an integer in the range [0..31] we want to use I (saving a - // load of a register), otherwise we must use 'r'. - if (CType == TargetLowering::C_Other || - CType == TargetLowering::C_Immediate) { - assert(OpInfo.Codes[i].size() == 1 && - "Unhandled multi-letter 'other' constraint"); - // FIXME: prefer immediate constraints if the target allows it - } - - // Things with matching constraints can only be registers, per gcc - // documentation. This mainly affects "g" constraints. - if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) - continue; - - // This constraint letter is more general than the previous one, use it. - int Generality = getConstraintGenerality(CType); - if (Generality > BestGenerality) { - BestType = CType; - BestIdx = i; - BestGenerality = Generality; - } - } - - OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; - OpInfo.ConstraintType = BestType; -} - static void computeConstraintToUse(const TargetLowering *TLI, TargetLowering::AsmOperandInfo &OpInfo) { assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); @@ -207,7 +142,18 @@ static void computeConstraintToUse(const TargetLowering *TLI, OpInfo.ConstraintCode = OpInfo.Codes[0]; OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode); } else { - chooseConstraint(OpInfo, TLI); + TargetLowering::ConstraintGroup G = TLI->getConstraintPreferences(OpInfo); + if (G.empty()) + return; + // FIXME: prefer immediate constraints if the target allows it + unsigned BestIdx = 0; + for (const unsigned E = G.size(); + BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other || + G[BestIdx].second == TargetLowering::C_Immediate); + ++BestIdx) + ; + OpInfo.ConstraintCode = G[BestIdx].first; + OpInfo.ConstraintType = G[BestIdx].second; } // 'X' matches anything. @@ -229,8 +175,8 @@ static void computeConstraintToUse(const TargetLowering *TLI, } static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) { - unsigned Flag = I.getOperand(OpIdx).getImm(); - return InlineAsm::getNumOperandRegisters(Flag); + const InlineAsm::Flag F(I.getOperand(OpIdx).getImm()); + return F.getNumOperandRegisters(); } static bool buildAnyextOrCopy(Register Dst, Register Src, @@ -373,16 +319,16 @@ bool InlineAsmLowering::lowerInlineAsm( switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory) { - unsigned ConstraintID = + const InlineAsm::ConstraintCode ConstraintID = TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode); - assert(ConstraintID != InlineAsm::Constraint_Unknown && + assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM instruction to know about this // output. - unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); - Inst.addImm(OpFlags); + InlineAsm::Flag Flag(InlineAsm::Kind::Mem, 1); + Flag.setMemConstraint(ConstraintID); + Inst.addImm(Flag); ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); assert( @@ -405,17 +351,17 @@ bool InlineAsmLowering::lowerInlineAsm( // Add information to the INLINEASM instruction to know that this // register is set. - unsigned Flag = InlineAsm::getFlagWord( - OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber - : InlineAsm::Kind_RegDef, - OpInfo.Regs.size()); + InlineAsm::Flag Flag(OpInfo.isEarlyClobber + ? InlineAsm::Kind::RegDefEarlyClobber + : InlineAsm::Kind::RegDef, + OpInfo.Regs.size()); if (OpInfo.Regs.front().isVirtual()) { // Put the register class of the virtual registers in the flag word. // That way, later passes can recompute register class constraints for // inline assembly as well as normal instructions. Don't do this for // tied operands that can use the regclass information from the def. const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); - Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + Flag.setRegClass(RC->getID()); } Inst.addImm(Flag); @@ -441,14 +387,13 @@ bool InlineAsmLowering::lowerInlineAsm( InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1; assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag"); - unsigned MatchedOperandFlag = Inst->getOperand(InstFlagIdx).getImm(); - if (InlineAsm::isMemKind(MatchedOperandFlag)) { + const InlineAsm::Flag MatchedOperandFlag(Inst->getOperand(InstFlagIdx).getImm()); + if (MatchedOperandFlag.isMemKind()) { LLVM_DEBUG(dbgs() << "Matching input constraint to mem operand not " "supported. This should be target specific.\n"); return false; } - if (!InlineAsm::isRegDefKind(MatchedOperandFlag) && - !InlineAsm::isRegDefEarlyClobberKind(MatchedOperandFlag)) { + if (!MatchedOperandFlag.isRegDefKind() && !MatchedOperandFlag.isRegDefEarlyClobberKind()) { LLVM_DEBUG(dbgs() << "Unknown matching constraint\n"); return false; } @@ -470,9 +415,9 @@ bool InlineAsmLowering::lowerInlineAsm( } // Add Flag and input register operand (In) to Inst. Tie In to Def. - unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); - unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx); - Inst.addImm(Flag); + InlineAsm::Flag UseFlag(InlineAsm::Kind::RegUse, 1); + UseFlag.setMatchingOp(DefIdx); + Inst.addImm(UseFlag); Inst.addReg(In); Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1); break; @@ -501,8 +446,8 @@ bool InlineAsmLowering::lowerInlineAsm( "Expected constraint to be lowered to at least one operand"); // Add information to the INLINEASM node to know about this input. - unsigned OpFlags = - InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); + const unsigned OpFlags = + InlineAsm::Flag(InlineAsm::Kind::Imm, Ops.size()); Inst.addImm(OpFlags); Inst.add(Ops); break; @@ -518,10 +463,10 @@ bool InlineAsmLowering::lowerInlineAsm( assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - unsigned ConstraintID = + const InlineAsm::ConstraintCode ConstraintID = TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode); - unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1); + OpFlags.setMemConstraint(ConstraintID); Inst.addImm(OpFlags); ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); @@ -563,11 +508,11 @@ bool InlineAsmLowering::lowerInlineAsm( return false; } - unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs); + InlineAsm::Flag Flag(InlineAsm::Kind::RegUse, NumRegs); if (OpInfo.Regs.front().isVirtual()) { // Put the register class of the virtual registers in the flag word. const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); - Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + Flag.setRegClass(RC->getID()); } Inst.addImm(Flag); if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder)) @@ -578,10 +523,9 @@ bool InlineAsmLowering::lowerInlineAsm( case InlineAsm::isClobber: { - unsigned NumRegs = OpInfo.Regs.size(); + const unsigned NumRegs = OpInfo.Regs.size(); if (NumRegs > 0) { - unsigned Flag = - InlineAsm::getFlagWord(InlineAsm::Kind_Clobber, NumRegs); + unsigned Flag = InlineAsm::Flag(InlineAsm::Kind::Clobber, NumRegs); Inst.addImm(Flag); for (Register Reg : OpInfo.Regs) { diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 9bbef11067ae..baea773cf528 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -58,21 +58,21 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) -InstructionSelect::InstructionSelect(CodeGenOpt::Level OL) +InstructionSelect::InstructionSelect(CodeGenOptLevel OL) : MachineFunctionPass(ID), OptLevel(OL) {} // In order not to crash when calling getAnalysis during testing with -run-pass // we use the default opt level here instead of None, so that the addRequired() // calls are made in getAnalysisUsage(). InstructionSelect::InstructionSelect() - : MachineFunctionPass(ID), OptLevel(CodeGenOpt::Default) {} + : MachineFunctionPass(ID), OptLevel(CodeGenOptLevel::Default) {} void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelKnownBitsAnalysis>(); AU.addPreserved<GISelKnownBitsAnalysis>(); - if (OptLevel != CodeGenOpt::None) { + if (OptLevel != CodeGenOptLevel::None) { AU.addRequired<ProfileSummaryInfoWrapperPass>(); LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } @@ -90,14 +90,15 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + ISel->setTargetPassConfig(&TPC); - CodeGenOpt::Level OldOptLevel = OptLevel; + CodeGenOptLevel OldOptLevel = OptLevel; auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; }); - OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None + OptLevel = MF.getFunction().hasOptNone() ? CodeGenOptLevel::None : MF.getTarget().getOptLevel(); GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); - if (OptLevel != CodeGenOpt::None) { + if (OptLevel != CodeGenOptLevel::None) { PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); if (PSI && PSI->hasProfileSummary()) BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); @@ -109,6 +110,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); + ISel->setRemarkEmitter(&MORE); // FIXME: There are many other MF/MFI fields we need to initialize. diff --git a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp index 8cfb1b786c24..45b403bdd076 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp @@ -76,6 +76,9 @@ LegacyLegalizerInfo::LegacyLegalizerInfo() { setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}}); setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS, 0, + {{1, Legal}}); setLegalizeScalarToDifferentSizeStrategy( TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall); diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index aecbe0b7604c..6d75258c1041 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -218,7 +218,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, // This will keep all the observers notified about new insertions/deletions. RAIIMFObsDelInstaller Installer(MF, WrapperObserver); LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB); - LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); + LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI, KB); bool Changed = false; SmallVector<MachineInstr *, 128> RetryList; do { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f0da0d88140f..37e7153be572 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -119,8 +119,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI, MIRBuilder.setInstrAndDebugLoc(MI); - if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || - MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) + if (isa<GIntrinsic>(MI)) return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize; auto Step = LI.getAction(MI, MRI); switch (Step.Action) { @@ -526,6 +525,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(EXP_F); case TargetOpcode::G_FEXP2: RTLIBCASE(EXP2_F); + case TargetOpcode::G_FEXP10: + RTLIBCASE(EXP10_F); case TargetOpcode::G_FREM: RTLIBCASE(REM_F); case TargetOpcode::G_FPOW: @@ -690,7 +691,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, LLT OpLLT = MRI.getType(Reg); Type *OpTy = nullptr; if (OpLLT.isPointer()) - OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); + OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace()); else OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); Args.push_back({Reg, OpTy, 0}); @@ -795,10 +796,134 @@ conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, {{MI.getOperand(1).getReg(), FromType, 0}}); } +static RTLIB::Libcall +getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) { + RTLIB::Libcall RTLibcall; + switch (MI.getOpcode()) { + case TargetOpcode::G_GET_FPMODE: + RTLibcall = RTLIB::FEGETMODE; + break; + case TargetOpcode::G_SET_FPMODE: + case TargetOpcode::G_RESET_FPMODE: + RTLibcall = RTLIB::FESETMODE; + break; + default: + llvm_unreachable("Unexpected opcode"); + } + return RTLibcall; +} + +// Some library functions that read FP state (fegetmode, fegetenv) write the +// state into a region in memory. IR intrinsics that do the same operations +// (get_fpmode, get_fpenv) return the state as integer value. To implement these +// intrinsics via the library functions, we need to use temporary variable, +// for example: +// +// %0:_(s32) = G_GET_FPMODE +// +// is transformed to: +// +// %1:_(p0) = G_FRAME_INDEX %stack.0 +// BL &fegetmode +// %0:_(s32) = G_LOAD % 1 +// +LegalizerHelper::LegalizeResult +LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, + MachineInstr &MI) { + const DataLayout &DL = MIRBuilder.getDataLayout(); + auto &MF = MIRBuilder.getMF(); + auto &MRI = *MIRBuilder.getMRI(); + auto &Ctx = MF.getFunction().getContext(); + + // Create temporary, where library function will put the read state. + Register Dst = MI.getOperand(0).getReg(); + LLT StateTy = MRI.getType(Dst); + TypeSize StateSize = StateTy.getSizeInBytes(); + Align TempAlign = getStackTemporaryAlignment(StateTy); + MachinePointerInfo TempPtrInfo; + auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo); + + // Create a call to library function, with the temporary as an argument. + unsigned TempAddrSpace = DL.getAllocaAddrSpace(); + Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace); + RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); + auto Res = + createLibcall(MIRBuilder, RTLibcall, + CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); + if (Res != LegalizerHelper::Legalized) + return Res; + + // Create a load from the temporary. + MachineMemOperand *MMO = MF.getMachineMemOperand( + TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign); + MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO); + + return LegalizerHelper::Legalized; +} + +// Similar to `createGetStateLibcall` the function calls a library function +// using transient space in stack. In this case the library function reads +// content of memory region. +LegalizerHelper::LegalizeResult +LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, + MachineInstr &MI) { + const DataLayout &DL = MIRBuilder.getDataLayout(); + auto &MF = MIRBuilder.getMF(); + auto &MRI = *MIRBuilder.getMRI(); + auto &Ctx = MF.getFunction().getContext(); + + // Create temporary, where library function will get the new state. + Register Src = MI.getOperand(0).getReg(); + LLT StateTy = MRI.getType(Src); + TypeSize StateSize = StateTy.getSizeInBytes(); + Align TempAlign = getStackTemporaryAlignment(StateTy); + MachinePointerInfo TempPtrInfo; + auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo); + + // Put the new state into the temporary. + MachineMemOperand *MMO = MF.getMachineMemOperand( + TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign); + MIRBuilder.buildStore(Src, Temp, *MMO); + + // Create a call to library function, with the temporary as an argument. + unsigned TempAddrSpace = DL.getAllocaAddrSpace(); + Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace); + RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); + return createLibcall(MIRBuilder, RTLibcall, + CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); +} + +// The function is used to legalize operations that set default environment +// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that. +// On most targets supported in glibc FE_DFL_MODE is defined as +// `((const femode_t *) -1)`. Such assumption is used here. If for some target +// it is not true, the target must provide custom lowering. +LegalizerHelper::LegalizeResult +LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, + MachineInstr &MI) { + const DataLayout &DL = MIRBuilder.getDataLayout(); + auto &MF = MIRBuilder.getMF(); + auto &Ctx = MF.getFunction().getContext(); + + // Create an argument for the library function. + unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); + Type *StatePtrTy = PointerType::get(Ctx, AddrSpace); + unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace); + LLT MemTy = LLT::pointer(AddrSpace, PtrSize); + auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL); + DstOp Dest(MRI.createGenericVirtualRegister(MemTy)); + MIRBuilder.buildIntToPtr(Dest, DefValue); + + RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); + return createLibcall(MIRBuilder, RTLibcall, + CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), + CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0})); +} + LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { - LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); - unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); switch (MI.getOpcode()) { @@ -810,6 +935,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: case TargetOpcode::G_CTLZ_ZERO_UNDEF: { + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); Type *HLTy = IntegerType::get(Ctx, Size); auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); if (Status != Legalized) @@ -831,6 +958,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FLDEXP: case TargetOpcode::G_FEXP: case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FEXP10: case TargetOpcode::G_FCEIL: case TargetOpcode::G_FFLOOR: case TargetOpcode::G_FMINNUM: @@ -839,6 +967,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FRINT: case TargetOpcode::G_FNEARBYINT: case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); @@ -901,6 +1031,24 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { MI.eraseFromParent(); return Result; } + case TargetOpcode::G_GET_FPMODE: { + LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI); + if (Result != Legalized) + return Result; + break; + } + case TargetOpcode::G_SET_FPMODE: { + LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI); + if (Result != Legalized) + return Result; + break; + } + case TargetOpcode::G_RESET_FPMODE: { + LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI); + if (Result != Legalized) + return Result; + break; + } } MI.eraseFromParent(); @@ -1297,7 +1445,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // So long as the new type has more bits than the bits we're extending we // don't need to break it apart. - if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { + if (NarrowTy.getScalarSizeInBits() > SizeInBits) { Observer.changingInstr(MI); // We don't lose any non-extension bits by truncating the src and // sign-extending the dst. @@ -1340,14 +1488,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Register AshrCstReg = MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) .getReg(0); - Register FullExtensionReg = 0; - Register PartialExtensionReg = 0; + Register FullExtensionReg; + Register PartialExtensionReg; // Do the operation on each small part. for (int i = 0; i < NumParts; ++i) { - if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) + if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) { DstRegs.push_back(SrcRegs[i]); - else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { + PartialExtensionReg = DstRegs.back(); + } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) { assert(PartialExtensionReg && "Expected to visit partial extension before full"); if (FullExtensionReg) { @@ -1993,8 +2142,20 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS}); auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS}); - auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy}, - {LeftOperand, RightOperand}); + // Multiplication cannot overflow if the WideTy is >= 2 * original width, + // so we don't need to check the overflow result of larger type Mulo. + bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth; + + unsigned MulOpc = + WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL; + + MachineInstrBuilder Mulo; + if (WideMulCanOverflow) + Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy}, + {LeftOperand, RightOperand}); + else + Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand}); + auto Mul = Mulo->getOperand(0); MIRBuilder.buildTrunc(Result, Mul); @@ -2012,9 +2173,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth); } - // Multiplication cannot overflow if the WideTy is >= 2 * original width, - // so we don't need to check the overflow result of larger type Mulo. - if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) { + if (WideMulCanOverflow) { auto Overflow = MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult); // Finally check if the multiplication in the larger type itself overflowed. @@ -2247,6 +2406,16 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_ROTR: + case TargetOpcode::G_ROTL: + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_SDIV: case TargetOpcode::G_SREM: case TargetOpcode::G_SMIN: @@ -2325,6 +2494,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_IS_FPCLASS: Observer.changingInstr(MI); if (TypeIdx == 0) @@ -2494,6 +2664,17 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; } case TargetOpcode::G_INSERT_VECTOR_ELT: { + if (TypeIdx == 0) { + Observer.changingInstr(MI); + const LLT WideEltTy = WideTy.getElementType(); + + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + if (TypeIdx == 1) { Observer.changingInstr(MI); @@ -2546,6 +2727,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FSQRT: case TargetOpcode::G_FEXP: case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FEXP10: case TargetOpcode::G_FPOW: case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_INTRINSIC_ROUND: @@ -2648,6 +2830,23 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAX: + case TargetOpcode::G_VECREDUCE_FMINIMUM: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + Register VecReg = MI.getOperand(1).getReg(); + LLT VecTy = MRI.getType(VecReg); + LLT WideVecTy = VecTy.isVector() + ? LLT::vector(VecTy.getElementCount(), WideTy) + : WideTy; + widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; } } @@ -3384,10 +3583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerFFloor(MI); case TargetOpcode::G_INTRINSIC_ROUND: return lowerIntrinsicRound(MI); - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + case TargetOpcode::G_FRINT: { // Since round even is the assumed rounding mode for unconstrained FP // operations, rint and roundeven are the same operation. - changeOpcode(MI, TargetOpcode::G_FRINT); + changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN); return Legalized; } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { @@ -3421,12 +3620,25 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { } case G_UADDE: { auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs(); - LLT Ty = MRI.getType(Res); + const LLT CondTy = MRI.getType(CarryOut); + const LLT Ty = MRI.getType(Res); + // Initial add of the two operands. auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); + + // Initial check for carry. + auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS); + + // Add the sum and the carry. auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn); MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); - MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); + + // Second check for carry. We can only carry if the initial sum is all 1s + // and the carry is set, resulting in a new sum of 0. + auto Zero = MIRBuilder.buildConstant(Ty, 0); + auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero); + auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn); + MIRBuilder.buildOr(CarryOut, Carry, Carry2); MI.eraseFromParent(); return Legalized; @@ -3445,13 +3657,23 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { const LLT CondTy = MRI.getType(BorrowOut); const LLT Ty = MRI.getType(Res); + // Initial subtract of the two operands. auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS); + + // Initial check for borrow. + auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS); + + // Subtract the borrow from the first subtract. auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn); MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); - auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS); - auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS); - MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); + // Second check for borrow. We can only borrow if the initial difference is + // 0 and the borrow is set, resulting in a new difference of all 1s. + auto Zero = MIRBuilder.buildConstant(Ty, 0); + auto TmpResEqZero = + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero); + auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn); + MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2); MI.eraseFromParent(); return Legalized; @@ -3503,6 +3725,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerShuffleVector(MI); case G_DYN_STACKALLOC: return lowerDynStackAlloc(MI); + case G_STACKSAVE: + return lowerStackSave(MI); + case G_STACKRESTORE: + return lowerStackRestore(MI); case G_EXTRACT: return lowerExtract(MI); case G_INSERT: @@ -3559,8 +3785,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerMemCpyFamily(MI); case G_MEMCPY_INLINE: return lowerMemcpyInline(MI); + case G_ZEXT: + case G_SEXT: + case G_ANYEXT: + return lowerEXT(MI); + case G_TRUNC: + return lowerTRUNC(MI); GISEL_VECREDUCE_CASES_NONSEQ return lowerVectorReduction(MI); + case G_VAARG: + return lowerVAArg(MI); } } @@ -4168,6 +4402,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FPOW: case G_FEXP: case G_FEXP2: + case G_FEXP10: case G_FLOG: case G_FLOG2: case G_FLOG10: @@ -4425,73 +4660,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( return Legalized; } -static unsigned getScalarOpcForReduction(unsigned Opc) { - unsigned ScalarOpc; - switch (Opc) { - case TargetOpcode::G_VECREDUCE_FADD: - ScalarOpc = TargetOpcode::G_FADD; - break; - case TargetOpcode::G_VECREDUCE_FMUL: - ScalarOpc = TargetOpcode::G_FMUL; - break; - case TargetOpcode::G_VECREDUCE_FMAX: - ScalarOpc = TargetOpcode::G_FMAXNUM; - break; - case TargetOpcode::G_VECREDUCE_FMIN: - ScalarOpc = TargetOpcode::G_FMINNUM; - break; - case TargetOpcode::G_VECREDUCE_ADD: - ScalarOpc = TargetOpcode::G_ADD; - break; - case TargetOpcode::G_VECREDUCE_MUL: - ScalarOpc = TargetOpcode::G_MUL; - break; - case TargetOpcode::G_VECREDUCE_AND: - ScalarOpc = TargetOpcode::G_AND; - break; - case TargetOpcode::G_VECREDUCE_OR: - ScalarOpc = TargetOpcode::G_OR; - break; - case TargetOpcode::G_VECREDUCE_XOR: - ScalarOpc = TargetOpcode::G_XOR; - break; - case TargetOpcode::G_VECREDUCE_SMAX: - ScalarOpc = TargetOpcode::G_SMAX; - break; - case TargetOpcode::G_VECREDUCE_SMIN: - ScalarOpc = TargetOpcode::G_SMIN; - break; - case TargetOpcode::G_VECREDUCE_UMAX: - ScalarOpc = TargetOpcode::G_UMAX; - break; - case TargetOpcode::G_VECREDUCE_UMIN: - ScalarOpc = TargetOpcode::G_UMIN; - break; - default: - llvm_unreachable("Unhandled reduction"); - } - return ScalarOpc; -} - LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { - unsigned Opc = MI.getOpcode(); - assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && - Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && - "Sequential reductions not expected"); + auto &RdxMI = cast<GVecReduce>(MI); if (TypeIdx != 1) return UnableToLegalize; // The semantics of the normal non-sequential reductions allow us to freely // re-associate the operation. - auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs(); if (NarrowTy.isVector() && (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)) return UnableToLegalize; - unsigned ScalarOpc = getScalarOpcForReduction(Opc); + unsigned ScalarOpc = RdxMI.getScalarOpcForReduction(); SmallVector<Register> SplitSrcs; // If NarrowTy is a scalar then we're being asked to scalarize. const unsigned NumParts = @@ -4536,10 +4720,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( SmallVector<Register> PartialReductions; for (unsigned Part = 0; Part < NumParts; ++Part) { PartialReductions.push_back( - MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); + MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]}) + .getReg(0)); } - // If the types involved are powers of 2, we can generate intermediate vector // ops, before generating a final reduction operation. if (isPowerOf2_32(SrcTy.getNumElements()) && @@ -4836,7 +5020,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_SUB: case TargetOpcode::G_MUL: case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: case TargetOpcode::G_UADDSAT: case TargetOpcode::G_USUBSAT: case TargetOpcode::G_SADDSAT: @@ -4886,6 +5072,14 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_FREEZE: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_BSWAP: case TargetOpcode::G_FCANONICALIZE: case TargetOpcode::G_SEXT_INREG: @@ -4943,15 +5137,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_TRUNC: { - Observer.changingInstr(MI); - moreElementsVectorSrc(MI, MoreTy, 1); - moreElementsVectorDst(MI, MoreTy, 0); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_TRUNC: case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FPEXT: { + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: { if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -5765,8 +5957,10 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + Observer.changingInstr(MI); MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); MI.getOperand(1).setReg(MIBTmp.getReg(0)); + Observer.changedInstr(MI); return Legalized; } case TargetOpcode::G_CTPOP: { @@ -5956,6 +6150,105 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) { return Result; } +LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + uint32_t DstTySize = DstTy.getSizeInBits(); + uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits(); + uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits(); + + if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) || + !isPowerOf2_32(SrcTyScalarSize)) + return UnableToLegalize; + + // The step between extend is too large, split it by creating an intermediate + // extend instruction + if (SrcTyScalarSize * 2 < DstTyScalarSize) { + LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2); + // If the destination type is illegal, split it into multiple statements + // zext x -> zext(merge(zext(unmerge), zext(unmerge))) + auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src}); + // Unmerge the vector + LLT EltTy = MidTy.changeElementCount( + MidTy.getElementCount().divideCoefficientBy(2)); + auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt); + + // ZExt the vectors + LLT ZExtResTy = DstTy.changeElementCount( + DstTy.getElementCount().divideCoefficientBy(2)); + auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy}, + {UnmergeSrc.getReg(0)}); + auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy}, + {UnmergeSrc.getReg(1)}); + + // Merge the ending vectors + MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2}); + + MI.eraseFromParent(); + return Legalized; + } + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) { + // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + // Similar to how operand splitting is done in SelectiondDAG, we can handle + // %res(v8s8) = G_TRUNC %in(v8s32) by generating: + // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) + // %lo16(<4 x s16>) = G_TRUNC %inlo + // %hi16(<4 x s16>) = G_TRUNC %inhi + // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 + // %res(<8 x s8>) = G_TRUNC %in16 + + assert(MI.getOpcode() == TargetOpcode::G_TRUNC); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + + if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) && + isPowerOf2_32(DstTy.getScalarSizeInBits()) && + isPowerOf2_32(SrcTy.getNumElements()) && + isPowerOf2_32(SrcTy.getScalarSizeInBits())) { + // Split input type. + LLT SplitSrcTy = SrcTy.changeElementCount( + SrcTy.getElementCount().divideCoefficientBy(2)); + + // First, split the source into two smaller vectors. + SmallVector<Register, 2> SplitSrcs; + extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs); + + // Truncate the splits into intermediate narrower elements. + LLT InterTy; + if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits()) + InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); + else + InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits()); + for (unsigned I = 0; I < SplitSrcs.size(); ++I) { + SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); + } + + // Combine the new truncates into one vector + auto Merge = MIRBuilder.buildMergeLikeInstr( + DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs); + + // Truncate the new vector to the final result type + if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits()) + MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0)); + else + MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0)); + + MI.eraseFromParent(); + + return Legalized; + } + return UnableToLegalize; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs(); @@ -6523,23 +6816,25 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { // round(x) => // t = trunc(x); // d = fabs(x - t); - // o = copysign(1.0f, x); - // return t + (d >= 0.5 ? o : 0.0); + // o = copysign(d >= 0.5 ? 1.0 : 0.0, x); + // return t + o; auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags); auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags); auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags); - auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); - auto One = MIRBuilder.buildFConstant(Ty, 1.0); + auto Half = MIRBuilder.buildFConstant(Ty, 0.5); - auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X); + auto Cmp = + MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags); - auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, - Flags); - auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags); + // Could emit G_UITOFP instead + auto One = MIRBuilder.buildFConstant(Ty, 1.0); + auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); + auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero); + auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X); - MIRBuilder.buildFAdd(DstReg, T, Sel, Flags); + MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags); MI.eraseFromParent(); return Legalized; @@ -6688,8 +6983,8 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { Align EltAlign; MachinePointerInfo PtrInfo; - auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), - VecAlign, PtrInfo); + auto StackTemp = createStackTemporary( + TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo); MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); // Get the pointer to the element, and be sure not to hit undefined behavior @@ -6727,26 +7022,9 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { LLT IdxTy = LLT::scalar(32); ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); - - if (DstTy.isScalar()) { - if (Src0Ty.isVector()) - return UnableToLegalize; - - // This is just a SELECT. - assert(Mask.size() == 1 && "Expected a single mask element"); - Register Val; - if (Mask[0] < 0 || Mask[0] > 1) - Val = MIRBuilder.buildUndef(DstTy).getReg(0); - else - Val = Mask[0] == 0 ? Src0Reg : Src1Reg; - MIRBuilder.buildCopy(DstReg, Val); - MI.eraseFromParent(); - return Legalized; - } - Register Undef; SmallVector<Register, 32> BuildVec; - LLT EltTy = DstTy.getElementType(); + LLT EltTy = DstTy.getScalarType(); for (int Idx : Mask) { if (Idx < 0) { @@ -6768,26 +7046,20 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { } } - MIRBuilder.buildBuildVector(DstReg, BuildVec); + if (DstTy.isScalar()) + MIRBuilder.buildCopy(DstReg, BuildVec[0]); + else + MIRBuilder.buildBuildVector(DstReg, BuildVec); MI.eraseFromParent(); return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { - const auto &MF = *MI.getMF(); - const auto &TFI = *MF.getSubtarget().getFrameLowering(); - if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) - return UnableToLegalize; - - Register Dst = MI.getOperand(0).getReg(); - Register AllocSize = MI.getOperand(1).getReg(); - Align Alignment = assumeAligned(MI.getOperand(2).getImm()); - - LLT PtrTy = MRI.getType(Dst); +Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg, + Register AllocSize, + Align Alignment, + LLT PtrTy) { LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); - Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); @@ -6802,7 +7074,25 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); } - SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); + return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + const auto &MF = *MI.getMF(); + const auto &TFI = *MF.getSubtarget().getFrameLowering(); + if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + Align Alignment = assumeAligned(MI.getOperand(2).getImm()); + + LLT PtrTy = MRI.getType(Dst); + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); + Register SPTmp = + getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); + MIRBuilder.buildCopy(SPReg, SPTmp); MIRBuilder.buildCopy(Dst, SPTmp); @@ -6811,6 +7101,28 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { } LegalizerHelper::LegalizeResult +LegalizerHelper::lowerStackSave(MachineInstr &MI) { + Register StackPtr = TLI.getStackPointerRegisterToSaveRestore(); + if (!StackPtr) + return UnableToLegalize; + + MIRBuilder.buildCopy(MI.getOperand(0), StackPtr); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerStackRestore(MachineInstr &MI) { + Register StackPtr = TLI.getStackPointerRegisterToSaveRestore(); + if (!StackPtr) + return UnableToLegalize; + + MIRBuilder.buildCopy(StackPtr, MI.getOperand(0)); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerExtract(MachineInstr &MI) { auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned Offset = MI.getOperand(2).getImm(); @@ -7577,6 +7889,56 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) { return UnableToLegalize; } +static Type *getTypeForLLT(LLT Ty, LLVMContext &C); + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) { + MachineFunction &MF = *MI.getMF(); + const DataLayout &DL = MIRBuilder.getDataLayout(); + LLVMContext &Ctx = MF.getFunction().getContext(); + Register ListPtr = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(ListPtr); + + // LstPtr is a pointer to the head of the list. Get the address + // of the head of the list. + Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx)); + MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment); + auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0); + + const Align A(MI.getOperand(2).getImm()); + LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits()); + if (A > TLI.getMinStackArgumentAlignment()) { + Register AlignAmt = + MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0); + auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt); + auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A)); + VAList = AndDst.getReg(0); + } + + // Increment the pointer, VAList, to the next vaarg + // The list should be bumped by the size of element in the current head of + // list. + Register Dst = MI.getOperand(0).getReg(); + LLT LLTTy = MRI.getType(Dst); + Type *Ty = getTypeForLLT(LLTTy, Ctx); + auto IncAmt = + MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty)); + auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt); + + // Store the increment VAList to the legalized pointer + MachineMemOperand *StoreMMO = MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment); + MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO); + // Load the actual argument out of the pointer VAList + Align EltAlignment = DL.getABITypeAlign(Ty); + MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment); + MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO); + + MI.eraseFromParent(); + return Legalized; +} + static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 1f2e481c63e0..de9931d1c240 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -77,13 +77,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) { } raw_ostream &LegalityQuery::print(raw_ostream &OS) const { - OS << Opcode << ", Tys={"; + OS << "Opcode=" << Opcode << ", Tys={"; for (const auto &Type : Types) { OS << Type << ", "; } - OS << "}, Opcode="; - - OS << Opcode << ", MMOs={"; + OS << "}, MMOs={"; for (const auto &MMODescr : MMODescrs) { OS << MMODescr.MemoryTy << ", "; } @@ -102,6 +100,7 @@ static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q, case Lower: case MoreElements: case FewerElements: + case Libcall: break; default: return Q.Types[Mutation.first] != Mutation.second; @@ -118,6 +117,10 @@ static bool mutationIsSane(const LegalizeRule &Rule, if (Rule.getAction() == Custom || Rule.getAction() == Legal) return true; + // Skip null mutation. + if (!Mutation.second.isValid()) + return true; + const unsigned TypeIdx = Mutation.first; const LLT OldTy = Q.Types[TypeIdx]; const LLT NewTy = Mutation.second; diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index 49f40495d6fc..246aa88b09ac 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -934,9 +934,8 @@ void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) { BitVector LegalSizes(MaxStoreSizeToForm * 2); const auto &LI = *MF->getSubtarget().getLegalizerInfo(); const auto &DL = MF->getFunction().getParent()->getDataLayout(); - Type *IntPtrIRTy = - DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace); - LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL); + Type *IRPtrTy = PointerType::get(MF->getFunction().getContext(), AddrSpace); + LLT PtrTy = getLLTForType(*IRPtrTy, DL); // We assume that we're not going to be generating any stores wider than // MaxStoreSizeToForm bits for now. for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) { diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 962b54ec5d6b..80e9c08e850b 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -196,14 +196,14 @@ void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0, assert((Res == Op0) && "type mismatch"); } -MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res, - const SrcOp &Op0, - const SrcOp &Op1) { +MachineInstrBuilder +MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0, + const SrcOp &Op1, std::optional<unsigned> Flags) { assert(Res.getLLTTy(*getMRI()).getScalarType().isPointer() && Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type"); - return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}); + return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags); } std::optional<MachineInstrBuilder> @@ -775,30 +775,55 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res, return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)}); } -MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, - ArrayRef<Register> ResultRegs, - bool HasSideEffects) { - auto MIB = - buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS - : TargetOpcode::G_INTRINSIC); +static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) { + if (HasSideEffects && IsConvergent) + return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS; + if (HasSideEffects) + return TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS; + if (IsConvergent) + return TargetOpcode::G_INTRINSIC_CONVERGENT; + return TargetOpcode::G_INTRINSIC; +} + +MachineInstrBuilder +MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, + ArrayRef<Register> ResultRegs, + bool HasSideEffects, bool isConvergent) { + auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent)); for (unsigned ResultReg : ResultRegs) MIB.addDef(ResultReg); MIB.addIntrinsicID(ID); return MIB; } +MachineInstrBuilder +MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, + ArrayRef<Register> ResultRegs) { + auto Attrs = Intrinsic::getAttributes(getContext(), ID); + bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory(); + bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent); + return buildIntrinsic(ID, ResultRegs, HasSideEffects, isConvergent); +} + MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Results, - bool HasSideEffects) { - auto MIB = - buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS - : TargetOpcode::G_INTRINSIC); + bool HasSideEffects, + bool isConvergent) { + auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent)); for (DstOp Result : Results) Result.addDefToMIB(*getMRI(), MIB); MIB.addIntrinsicID(ID); return MIB; } +MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, + ArrayRef<DstOp> Results) { + auto Attrs = Intrinsic::getAttributes(getContext(), ID); + bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory(); + bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent); + return buildIntrinsic(ID, Results, HasSideEffects, isConvergent); +} + MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res, const SrcOp &Op) { return buildInstr(TargetOpcode::G_TRUNC, Res, Op); @@ -1040,16 +1065,16 @@ void MachineIRBuilder::validateTruncExt(const LLT DstTy, const LLT SrcTy, #ifndef NDEBUG if (DstTy.isVector()) { assert(SrcTy.isVector() && "mismatched cast between vector and non-vector"); - assert(SrcTy.getNumElements() == DstTy.getNumElements() && + assert(SrcTy.getElementCount() == DstTy.getElementCount() && "different number of elements in a trunc/ext"); } else assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc"); if (IsExtend) - assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && + assert(TypeSize::isKnownGT(DstTy.getSizeInBits(), SrcTy.getSizeInBits()) && "invalid narrowing extend"); else - assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() && + assert(TypeSize::isKnownLT(DstTy.getSizeInBits(), SrcTy.getSizeInBits()) && "invalid widening trunc"); #endif } diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 885a1056b2ea..bb5363fb2527 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -449,7 +449,8 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( return MappingCost::ImpossibleCost(); // If mapped with InstrMapping, MI will have the recorded cost. - MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1); + MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) + : BlockFrequency(1)); bool Saturated = Cost.addLocalCost(InstrMapping.getCost()); assert(!Saturated && "Possible mapping saturated the cost"); LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI); @@ -623,7 +624,7 @@ bool RegBankSelect::applyMapping( // Second, rewrite the instruction. LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); - RBI->applyMapping(OpdMapper); + RBI->applyMapping(MIRBuilder, OpdMapper); return true; } @@ -971,7 +972,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const { return Src.canSplitCriticalEdge(DstOrSplit); } -RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq) +RegBankSelect::MappingCost::MappingCost(BlockFrequency LocalFreq) : LocalFreq(LocalFreq.getFrequency()) {} bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) { diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 080600d3cc98..eaf829f562b2 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -205,8 +205,15 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg, return false; // Replace if either DstReg has no constraints or the register // constraints match. - return !MRI.getRegClassOrRegBank(DstReg) || - MRI.getRegClassOrRegBank(DstReg) == MRI.getRegClassOrRegBank(SrcReg); + const auto &DstRBC = MRI.getRegClassOrRegBank(DstReg); + if (!DstRBC || DstRBC == MRI.getRegClassOrRegBank(SrcReg)) + return true; + + // Otherwise match if the Src is already a regclass that is covered by the Dst + // RegBank. + return DstRBC.is<const RegisterBank *>() && MRI.getRegClassOrNull(SrcReg) && + DstRBC.get<const RegisterBank *>()->covers( + *MRI.getRegClassOrNull(SrcReg)); } bool llvm::isTriviallyDead(const MachineInstr &MI, @@ -773,6 +780,29 @@ std::optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, return std::nullopt; } +std::optional<APInt> llvm::ConstantFoldCastOp(unsigned Opcode, LLT DstTy, + const Register Op0, + const MachineRegisterInfo &MRI) { + std::optional<APInt> Val = getIConstantVRegVal(Op0, MRI); + if (!Val) + return Val; + + const unsigned DstSize = DstTy.getScalarSizeInBits(); + + switch (Opcode) { + case TargetOpcode::G_SEXT: + return Val->sext(DstSize); + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + // TODO: DAG considers target preference when constant folding any_extend. + return Val->zext(DstSize); + default: + break; + } + + llvm_unreachable("unexpected cast opcode to constant fold"); +} + std::optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI) { @@ -1086,9 +1116,9 @@ std::optional<APInt> llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) { if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) { - std::optional<ValueAndVReg> ValAndVReg = - getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI); - return ValAndVReg->Value; + if (std::optional<ValueAndVReg> ValAndVReg = + getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI)) + return ValAndVReg->Value; } return std::nullopt; @@ -1143,7 +1173,7 @@ llvm::getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI) { if (auto Splat = getIConstantSplatSExtVal(MI, MRI)) return RegOrConstant(*Splat); auto Reg = MI.getOperand(1).getReg(); - if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()), + if (any_of(drop_begin(MI.operands(), 2), [&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; })) return std::nullopt; return RegOrConstant(Reg); |
