aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/GlobalISel
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp20
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Combiner.cpp63
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp657
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp15
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp16
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp111
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp134
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp14
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Legalizer.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp622
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp59
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp7
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp42
17 files changed, 1255 insertions, 532 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index e047996f9aa8..ca4d0986b442 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -76,9 +76,9 @@ bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
}
std::unique_ptr<CSEConfigBase>
-llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) {
+llvm::getStandardCSEConfigForOpt(CodeGenOptLevel Level) {
std::unique_ptr<CSEConfigBase> Config;
- if (Level == CodeGenOpt::None)
+ if (Level == CodeGenOptLevel::None)
Config = std::make_unique<CSEConfigConstantOnly>();
else
Config = std::make_unique<CSEConfigFull>();
@@ -244,8 +244,6 @@ void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); }
void GISelCSEInfo::analyze(MachineFunction &MF) {
setMF(MF);
for (auto &MBB : MF) {
- if (MBB.empty())
- continue;
for (MachineInstr &MI : MBB) {
if (!shouldCSE(MI.getOpcode()))
continue;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 28c33e2038e4..2527b1431289 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -110,6 +110,8 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL);
Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg);
+ Info.IsConvergent = CB.isConvergent();
+
if (!Info.CanLowerReturn) {
// Callee requires sret demotion.
insertSRetOutgoingArgument(MIRBuilder, CB, Info);
@@ -356,7 +358,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
if (PartLLT.isVector() == LLTy.isVector() &&
PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() &&
(!PartLLT.isVector() ||
- PartLLT.getNumElements() == LLTy.getNumElements()) &&
+ PartLLT.getElementCount() == LLTy.getElementCount()) &&
OrigRegs.size() == 1 && Regs.size() == 1) {
Register SrcReg = Regs[0];
@@ -404,6 +406,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
// If PartLLT is a mismatched vector in both number of elements and element
// size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to
// have the same elt type, i.e. v4s32.
+ // TODO: Extend this coersion to element multiples other than just 2.
if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() &&
PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 &&
Regs.size() == 1) {
@@ -845,7 +848,8 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
- Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
+ Type *RetPtrTy =
+ PointerType::get(RetTy->getContext(), DL.getAllocaAddrSpace());
LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
@@ -1132,7 +1136,7 @@ void CallLowering::ValueHandler::copyArgumentMemory(
}
Register CallLowering::ValueHandler::extendRegister(Register ValReg,
- CCValAssign &VA,
+ const CCValAssign &VA,
unsigned MaxSizeBits) {
LLT LocTy{VA.getLocVT()};
LLT ValTy{VA.getValVT()};
@@ -1181,9 +1185,8 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
void CallLowering::ValueAssigner::anchor() {}
-Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA,
- Register SrcReg,
- LLT NarrowTy) {
+Register CallLowering::IncomingValueHandler::buildExtensionHint(
+ const CCValAssign &VA, Register SrcReg, LLT NarrowTy) {
switch (VA.getLocInfo()) {
case CCValAssign::LocInfo::ZExt: {
return MIRBuilder
@@ -1223,9 +1226,8 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
(DstTy.isPointer() && SrcTy.isScalar());
}
-void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
- Register PhysReg,
- CCValAssign VA) {
+void CallLowering::IncomingValueHandler::assignValueToReg(
+ Register ValVReg, Register PhysReg, const CCValAssign &VA) {
const MVT LocVT = VA.getLocVT();
const LLT LocTy(LocVT);
const LLT RegTy = MRI.getType(ValVReg);
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 748fa273d499..d18e65a83484 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -39,7 +39,6 @@ cl::OptionCategory GICombinerOptionCategory(
);
} // end namespace llvm
-namespace {
/// This class acts as the glue the joins the CombinerHelper to the overall
/// Combine algorithm. The CombinerHelper is intended to report the
/// modifications it makes to the MIR to the GISelChangeObserver and the
@@ -48,7 +47,7 @@ namespace {
/// instruction creation will schedule that instruction for a future visit.
/// Other Combiner implementations may require more complex behaviour from
/// their GISelChangeObserver subclass.
-class WorkListMaintainer : public GISelChangeObserver {
+class Combiner::WorkListMaintainer : public GISelChangeObserver {
using WorkListTy = GISelWorkList<512>;
WorkListTy &WorkList;
/// The instructions that have been created but we want to report once they
@@ -88,27 +87,46 @@ public:
LLVM_DEBUG(CreatedInstrs.clear());
}
};
-}
-Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC)
- : CInfo(Info), TPC(TPC) {
+Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo,
+ const TargetPassConfig *TPC, GISelKnownBits *KB,
+ GISelCSEInfo *CSEInfo)
+ : Builder(CSEInfo ? std::make_unique<CSEMIRBuilder>()
+ : std::make_unique<MachineIRBuilder>()),
+ WLObserver(std::make_unique<WorkListMaintainer>(WorkList)),
+ ObserverWrapper(std::make_unique<GISelObserverWrapper>()), CInfo(CInfo),
+ Observer(*ObserverWrapper), B(*Builder), MF(MF), MRI(MF.getRegInfo()),
+ KB(KB), TPC(TPC), CSEInfo(CSEInfo) {
(void)this->TPC; // FIXME: Remove when used.
+
+ // Setup builder.
+ B.setMF(MF);
+ if (CSEInfo)
+ B.setCSEInfo(CSEInfo);
+
+ // Setup observer.
+ ObserverWrapper->addObserver(WLObserver.get());
+ if (CSEInfo)
+ ObserverWrapper->addObserver(CSEInfo);
+
+ B.setChangeObserver(*ObserverWrapper);
}
-bool Combiner::combineMachineInstrs(MachineFunction &MF,
- GISelCSEInfo *CSEInfo) {
+Combiner::~Combiner() = default;
+
+bool Combiner::combineMachineInstrs() {
// If the ISel pipeline failed, do not bother running this pass.
// FIXME: Should this be here or in individual combiner passes.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
- Builder =
- CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>();
- MRI = &MF.getRegInfo();
- Builder->setMF(MF);
- if (CSEInfo)
- Builder->setCSEInfo(CSEInfo);
+ // We can't call this in the constructor because the derived class is
+ // uninitialized at that time.
+ if (!HasSetupMF) {
+ HasSetupMF = true;
+ setupMF(MF, KB);
+ }
LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n');
@@ -116,26 +134,23 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
bool MFChanged = false;
bool Changed;
- MachineIRBuilder &B = *Builder;
do {
+ WorkList.clear();
+
// Collect all instructions. Do a post order traversal for basic blocks and
// insert with list bottom up, so while we pop_back_val, we'll traverse top
// down RPOT.
Changed = false;
- GISelWorkList<512> WorkList;
- WorkListMaintainer Observer(WorkList);
- GISelObserverWrapper WrapperObserver(&Observer);
- if (CSEInfo)
- WrapperObserver.addObserver(CSEInfo);
- RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
+
+ RAIIDelegateInstaller DelInstall(MF, ObserverWrapper.get());
for (MachineBasicBlock *MBB : post_order(&MF)) {
for (MachineInstr &CurMI :
llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// Erase dead insts before even adding to the list.
- if (isTriviallyDead(CurMI, *MRI)) {
+ if (isTriviallyDead(CurMI, MRI)) {
LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
- llvm::salvageDebugInfo(*MRI, CurMI);
+ llvm::salvageDebugInfo(MRI, CurMI);
CurMI.eraseFromParent();
continue;
}
@@ -147,8 +162,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
while (!WorkList.empty()) {
MachineInstr *CurrInst = WorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
- Changed |= CInfo.combine(WrapperObserver, *CurrInst, B);
- Observer.reportFullyCreatedInstrs();
+ Changed |= tryCombineAll(*CurrInst);
+ WLObserver->reportFullyCreatedInstrs();
}
MFChanged |= Changed;
} while (Changed);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index cc7fb3ee1109..91a64d59e154 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
@@ -395,6 +397,39 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
replaceRegWith(MRI, DstReg, NewDstReg);
}
+bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Invalid instruction kind");
+
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ return Mask.size() == 1;
+}
+
+void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+
+ int I = MI.getOperand(3).getShuffleMask()[0];
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT Src1Ty = MRI.getType(Src1);
+ int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+ Register SrcReg;
+ if (I >= Src1NumElts) {
+ SrcReg = MI.getOperand(2).getReg();
+ I -= Src1NumElts;
+ } else if (I >= 0)
+ SrcReg = Src1;
+
+ if (I < 0)
+ Builder.buildUndef(DstReg);
+ else if (!MRI.getType(SrcReg).isVector())
+ Builder.buildCopy(DstReg, SrcReg);
+ else
+ Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I);
+
+ MI.eraseFromParent();
+}
+
namespace {
/// Select a preference between two uses. CurrentUse is the current preference
@@ -910,160 +945,332 @@ void CombinerHelper::applySextInRegOfLoad(
MI.eraseFromParent();
}
-bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
- Register &Base, Register &Offset) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
-#ifndef NDEBUG
- unsigned Opcode = MI.getOpcode();
- assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
- Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
-#endif
+/// Return true if 'MI' is a load or a store that may be fold it's address
+/// operand into the load / store addressing mode.
+static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI,
+ MachineRegisterInfo &MRI) {
+ TargetLowering::AddrMode AM;
+ auto *MF = MI->getMF();
+ auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
+ if (!Addr)
+ return false;
+
+ AM.HasBaseReg = true;
+ if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
+ AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
+ else
+ AM.Scale = 1; // [reg +/- reg]
- Base = MI.getOperand(1).getReg();
- MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
- if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return TLI.isLegalAddressingMode(
+ MF->getDataLayout(), AM,
+ getTypeForLLT(MI->getMMO().getMemoryType(),
+ MF->getFunction().getContext()),
+ MI->getMMO().getAddrSpace());
+}
+
+static unsigned getIndexedOpc(unsigned LdStOpc) {
+ switch (LdStOpc) {
+ case TargetOpcode::G_LOAD:
+ return TargetOpcode::G_INDEXED_LOAD;
+ case TargetOpcode::G_STORE:
+ return TargetOpcode::G_INDEXED_STORE;
+ case TargetOpcode::G_ZEXTLOAD:
+ return TargetOpcode::G_INDEXED_ZEXTLOAD;
+ case TargetOpcode::G_SEXTLOAD:
+ return TargetOpcode::G_INDEXED_SEXTLOAD;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
+bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
+ // Check for legality.
+ LLT PtrTy = MRI.getType(LdSt.getPointerReg());
+ LLT Ty = MRI.getType(LdSt.getReg(0));
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
+ SmallVector<LLT> OpTys;
+ if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
+ OpTys = {PtrTy, Ty, Ty};
+ else
+ OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
+
+ LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
+ return isLegal(Q);
+}
+
+static cl::opt<unsigned> PostIndexUseThreshold(
+ "post-index-use-threshold", cl::Hidden, cl::init(32),
+ cl::desc("Number of uses of a base pointer to check before it is no longer "
+ "considered for post-indexing."));
+
+bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
+ Register &Base, Register &Offset,
+ bool &RematOffset) {
+ // We're looking for the following pattern, for either load or store:
+ // %baseptr:_(p0) = ...
+ // G_STORE %val(s64), %baseptr(p0)
+ // %offset:_(s64) = G_CONSTANT i64 -256
+ // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
+ const auto &TLI = getTargetLowering();
+
+ Register Ptr = LdSt.getPointerReg();
+ // If the store is the only use, don't bother.
+ if (MRI.hasOneNonDBGUse(Ptr))
return false;
- LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
- // FIXME: The following use traversal needs a bail out for patholigical cases.
- for (auto &Use : MRI.use_nodbg_instructions(Base)) {
- if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
+ if (!isIndexedLoadStoreLegal(LdSt))
+ return false;
+
+ if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
+ return false;
+
+ MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
+ auto *PtrDef = MRI.getVRegDef(Ptr);
+
+ unsigned NumUsesChecked = 0;
+ for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
+ if (++NumUsesChecked > PostIndexUseThreshold)
+ return false; // Try to avoid exploding compile time.
+
+ auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
+ // The use itself might be dead. This can happen during combines if DCE
+ // hasn't had a chance to run yet. Don't allow it to form an indexed op.
+ if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
+ continue;
+
+ // Check the user of this isn't the store, otherwise we'd be generate a
+ // indexed store defining its own use.
+ if (StoredValDef == &Use)
continue;
- Offset = Use.getOperand(2).getReg();
+ Offset = PtrAdd->getOffsetReg();
if (!ForceLegalIndexing &&
- !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
- LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
- << Use);
+ !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
+ /*IsPre*/ false, MRI))
continue;
- }
// Make sure the offset calculation is before the potentially indexed op.
- // FIXME: we really care about dependency here. The offset calculation might
- // be movable.
- MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
- if (!OffsetDef || !dominates(*OffsetDef, MI)) {
- LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
- << Use);
- continue;
+ MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
+ RematOffset = false;
+ if (!dominates(*OffsetDef, LdSt)) {
+ // If the offset however is just a G_CONSTANT, we can always just
+ // rematerialize it where we need it.
+ if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
+ continue;
+ RematOffset = true;
}
- // FIXME: check whether all uses of Base are load/store with foldable
- // addressing modes. If so, using the normal addr-modes is better than
- // forming an indexed one.
+ for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
+ if (&BasePtrUse == PtrDef)
+ continue;
- bool MemOpDominatesAddrUses = true;
- for (auto &PtrAddUse :
- MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
- if (!dominates(MI, PtrAddUse)) {
- MemOpDominatesAddrUses = false;
- break;
- }
- }
+ // If the user is a later load/store that can be post-indexed, then don't
+ // combine this one.
+ auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
+ if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
+ dominates(LdSt, *BasePtrLdSt) &&
+ isIndexedLoadStoreLegal(*BasePtrLdSt))
+ return false;
- if (!MemOpDominatesAddrUses) {
- LLVM_DEBUG(
- dbgs() << " Ignoring candidate as memop does not dominate uses: "
- << Use);
- continue;
+ // Now we're looking for the key G_PTR_ADD instruction, which contains
+ // the offset add that we want to fold.
+ if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
+ Register PtrAddDefReg = BasePtrUseDef->getReg(0);
+ for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
+ // If the use is in a different block, then we may produce worse code
+ // due to the extra register pressure.
+ if (BaseUseUse.getParent() != LdSt.getParent())
+ return false;
+
+ if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
+ if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
+ return false;
+ }
+ if (!dominates(LdSt, BasePtrUse))
+ return false; // All use must be dominated by the load/store.
+ }
}
- LLVM_DEBUG(dbgs() << " Found match: " << Use);
- Addr = Use.getOperand(0).getReg();
+ Addr = PtrAdd->getReg(0);
+ Base = PtrAdd->getBaseReg();
return true;
}
return false;
}
-bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
+bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
Register &Base, Register &Offset) {
- auto &MF = *MI.getParent()->getParent();
+ auto &MF = *LdSt.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
-#ifndef NDEBUG
- unsigned Opcode = MI.getOpcode();
- assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
- Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
-#endif
-
- Addr = MI.getOperand(1).getReg();
- MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
- if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
+ Addr = LdSt.getPointerReg();
+ if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
+ MRI.hasOneNonDBGUse(Addr))
return false;
- Base = AddrDef->getOperand(1).getReg();
- Offset = AddrDef->getOperand(2).getReg();
-
- LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
-
if (!ForceLegalIndexing &&
- !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
- LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
+ !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
+ return false;
+
+ if (!isIndexedLoadStoreLegal(LdSt))
return false;
- }
MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
- if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
- LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
+ if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return false;
- }
- if (MI.getOpcode() == TargetOpcode::G_STORE) {
+ if (auto *St = dyn_cast<GStore>(&LdSt)) {
// Would require a copy.
- if (Base == MI.getOperand(0).getReg()) {
- LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
+ if (Base == St->getValueReg())
return false;
- }
// We're expecting one use of Addr in MI, but it could also be the
// value stored, which isn't actually dominated by the instruction.
- if (MI.getOperand(0).getReg() == Addr) {
- LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
+ if (St->getValueReg() == Addr)
return false;
- }
}
+ // Avoid increasing cross-block register pressure.
+ for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
+ if (AddrUse.getParent() != LdSt.getParent())
+ return false;
+
// FIXME: check whether all uses of the base pointer are constant PtrAdds.
// That might allow us to end base's liveness here by adjusting the constant.
+ bool RealUse = false;
+ for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
+ if (!dominates(LdSt, AddrUse))
+ return false; // All use must be dominated by the load/store.
- for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
- if (!dominates(MI, UseMI)) {
- LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
- return false;
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
+ if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
+ RealUse = true;
+ } else {
+ RealUse = true;
}
}
-
- return true;
+ return RealUse;
}
-bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
- IndexedLoadStoreMatchInfo MatchInfo;
- if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
- applyCombineIndexedLoadStore(MI, MatchInfo);
- return true;
+bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
+
+ // Check if there is a load that defines the vector being extracted from.
+ auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
+ if (!LoadMI)
+ return false;
+
+ Register Vector = MI.getOperand(1).getReg();
+ LLT VecEltTy = MRI.getType(Vector).getElementType();
+
+ assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
+
+ // Checking whether we should reduce the load width.
+ if (!MRI.hasOneNonDBGUse(Vector))
+ return false;
+
+ // Check if the defining load is simple.
+ if (!LoadMI->isSimple())
+ return false;
+
+ // If the vector element type is not a multiple of a byte then we are unable
+ // to correctly compute an address to load only the extracted element as a
+ // scalar.
+ if (!VecEltTy.isByteSized())
+ return false;
+
+ // Check if the new load that we are going to create is legal
+ // if we are in the post-legalization phase.
+ MachineMemOperand MMO = LoadMI->getMMO();
+ Align Alignment = MMO.getAlign();
+ MachinePointerInfo PtrInfo;
+ uint64_t Offset;
+
+ // Finding the appropriate PtrInfo if offset is a known constant.
+ // This is required to create the memory operand for the narrowed load.
+ // This machine memory operand object helps us infer about legality
+ // before we proceed to combine the instruction.
+ if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
+ int Elt = CVal->getZExtValue();
+ // FIXME: should be (ABI size)*Elt.
+ Offset = VecEltTy.getSizeInBits() * Elt / 8;
+ PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
+ } else {
+ // Discard the pointer info except the address space because the memory
+ // operand can't represent this new access since the offset is variable.
+ Offset = VecEltTy.getSizeInBits() / 8;
+ PtrInfo = MachinePointerInfo(MMO.getPointerInfo().getAddrSpace());
}
- return false;
-}
-bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
- unsigned Opcode = MI.getOpcode();
- if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
- Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
+ Alignment = commonAlignment(Alignment, Offset);
+
+ Register VecPtr = LoadMI->getPointerReg();
+ LLT PtrTy = MRI.getType(VecPtr);
+
+ MachineFunction &MF = *MI.getMF();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
+
+ LegalityQuery::MemDesc MMDesc(*NewMMO);
+
+ LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
+
+ if (!isLegalOrBeforeLegalizer(Q))
return false;
- // For now, no targets actually support these opcodes so don't waste time
- // running these unless we're forced to for testing.
- if (!ForceLegalIndexing)
+ // Load must be allowed and fast on the target.
+ LLVMContext &C = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ unsigned Fast = 0;
+ if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
+ &Fast) ||
+ !Fast)
return false;
- MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+ Register Result = MI.getOperand(0).getReg();
+ Register Index = MI.getOperand(2).getReg();
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(B.getMF(), DummyObserver, B);
+ //// Get pointer to the vector element.
+ Register finalPtr = Helper.getVectorElementPointer(
+ LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
+ Index);
+ // New G_LOAD instruction.
+ B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
+ // Remove original GLOAD instruction.
+ LoadMI->eraseFromParent();
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchCombineIndexedLoadStore(
+ MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
+ auto &LdSt = cast<GLoadStore>(MI);
+
+ if (LdSt.isAtomic())
+ return false;
+
+ MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
- !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
- MatchInfo.Offset))
+ !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.Offset, MatchInfo.RematOffset))
return false;
return true;
@@ -1072,28 +1279,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS
void CombinerHelper::applyCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
- MachineIRBuilder MIRBuilder(MI);
+ Builder.setInstrAndDebugLoc(MI);
unsigned Opcode = MI.getOpcode();
bool IsStore = Opcode == TargetOpcode::G_STORE;
- unsigned NewOpcode;
- switch (Opcode) {
- case TargetOpcode::G_LOAD:
- NewOpcode = TargetOpcode::G_INDEXED_LOAD;
- break;
- case TargetOpcode::G_SEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
- break;
- case TargetOpcode::G_ZEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
- break;
- case TargetOpcode::G_STORE:
- NewOpcode = TargetOpcode::G_INDEXED_STORE;
- break;
- default:
- llvm_unreachable("Unknown load/store opcode");
+ unsigned NewOpcode = getIndexedOpc(Opcode);
+
+ // If the offset constant didn't happen to dominate the load/store, we can
+ // just clone it as needed.
+ if (MatchInfo.RematOffset) {
+ auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
+ auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
+ *OldCst->getOperand(1).getCImm());
+ MatchInfo.Offset = NewCst.getReg(0);
}
- auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ auto MIB = Builder.buildInstr(NewOpcode);
if (IsStore) {
MIB.addDef(MatchInfo.Addr);
MIB.addUse(MI.getOperand(0).getReg());
@@ -1105,6 +1305,7 @@ void CombinerHelper::applyCombineIndexedLoadStore(
MIB.addUse(MatchInfo.Base);
MIB.addUse(MatchInfo.Offset);
MIB.addImm(MatchInfo.IsPre);
+ MIB->cloneMemRefs(*MI.getMF(), MI);
MI.eraseFromParent();
AddrDef.eraseFromParent();
@@ -1271,13 +1472,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
- if (Ty.isVector())
- return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
- return IntegerType::get(C, Ty.getSizeInBits());
-}
-
+
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
@@ -1394,7 +1589,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
if (AccessTy) {
AMNew.HasBaseReg = true;
TargetLoweringBase::AddrMode AMOld;
- AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
+ AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
AMOld.HasBaseReg = true;
unsigned AS = MRI.getType(Add2).getAddressSpace();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -1456,7 +1651,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
// Pass the combined immediate to the apply function.
MatchInfo.Imm =
- (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
+ (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
MatchInfo.Reg = Base;
// There is no simple replacement for a saturating unsigned left shift that
@@ -1535,7 +1730,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Find a matching one-use shift by constant.
const Register C1 = MI.getOperand(2).getReg();
auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
- if (!MaybeImmVal)
+ if (!MaybeImmVal || MaybeImmVal->Value == 0)
return false;
const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
@@ -1685,6 +1880,8 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
RegisterImmPair &MatchData) {
assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+ if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
+ return false;
Register LHS = MI.getOperand(1).getReg();
@@ -2248,35 +2445,6 @@ void CombinerHelper::applyCombineExtOfExt(
}
}
-void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
-
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
- MI.getFlags());
- MI.eraseFromParent();
-}
-
-bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
- BuildFnTy &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
- Register Src = MI.getOperand(1).getReg();
- Register NegSrc;
-
- if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
- return false;
-
- MatchInfo = [=, &MI](MachineIRBuilder &B) {
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(NegSrc);
- Observer.changedInstr(MI);
- };
- return true;
-}
-
bool CombinerHelper::matchCombineTruncOfExt(
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
@@ -2580,6 +2748,16 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
MaybeCst->getSExtValue() == C;
}
+bool CombinerHelper::matchConstantFPOp(const MachineOperand &MOP, double C) {
+ if (!MOP.isReg())
+ return false;
+ std::optional<FPValueAndVReg> MaybeCst;
+ if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
+ return false;
+
+ return MaybeCst->Value.isExactlyValue(C);
+}
+
void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
unsigned OpIdx) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
@@ -2599,6 +2777,45 @@ void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
replaceRegWith(MRI, OldReg, Replacement);
}
+bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI,
+ unsigned ConstIdx) {
+ Register ConstReg = MI.getOperand(ConstIdx).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // Get the shift amount
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
+ if (!VRegAndVal)
+ return false;
+
+ // Return true of shift amount >= Bitwidth
+ return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
+}
+
+void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
+ MI.getOpcode() == TargetOpcode::G_FSHR) &&
+ "This is not a funnel shift operation");
+
+ Register ConstReg = MI.getOperand(3).getReg();
+ LLT ConstTy = MRI.getType(ConstReg);
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
+ assert((VRegAndVal) && "Value is not a constant");
+
+ // Calculate the new Shift Amount = Old Shift Amount % BitWidth
+ APInt NewConst = VRegAndVal->Value.urem(
+ APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
+ Builder.buildInstr(
+ MI.getOpcode(), {MI.getOperand(0)},
+ {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
+
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
// Match (cond ? x : x)
@@ -2652,6 +2869,13 @@ void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
MI.eraseFromParent();
}
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
+ MI.eraseFromParent();
+}
+
void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
@@ -3246,7 +3470,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
unsigned BinOpcode = MI.getOpcode();
- // We know know one of the operands is a select of constants. Now verify that
+ // We know that one of the operands is a select of constants. Now verify that
// the other binary operator operand is either a constant, or we can handle a
// variable.
bool CanFoldNonConst =
@@ -4141,8 +4365,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
- if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
- TargetOpcode::G_UBFX, Ty, ExtractTy))
+ if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
return false;
int64_t AndImm, LSBImm;
@@ -4228,8 +4451,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
const Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
- if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
- TargetOpcode::G_UBFX, Ty, ExtractTy))
+ if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
return false;
// Try to match shr (and x, c1), c2
@@ -4279,20 +4501,20 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
}
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
- MachineInstr &PtrAdd) {
- assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
+ MachineInstr &MI) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
- Register Src1Reg = PtrAdd.getOperand(1).getReg();
- MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
+ Register Src1Reg = PtrAdd.getBaseReg();
+ auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
if (!Src1Def)
return false;
- Register Src2Reg = PtrAdd.getOperand(2).getReg();
+ Register Src2Reg = PtrAdd.getOffsetReg();
if (MRI.hasOneNonDBGUse(Src1Reg))
return false;
- auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
if (!C1)
return false;
auto C2 = getIConstantVRegVal(Src2Reg, MRI);
@@ -4303,7 +4525,7 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
const APInt &C2APIntVal = *C2;
const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
- for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
+ for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
// This combine may end up running before ptrtoint/inttoptr combines
// manage to eliminate redundant conversions, so try to look through them.
MachineInstr *ConvUseMI = &UseMI;
@@ -4316,9 +4538,8 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
ConvUseOpc = ConvUseMI->getOpcode();
}
- auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
- ConvUseOpc == TargetOpcode::G_STORE;
- if (!LoadStore)
+ auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
+ if (!LdStMI)
continue;
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
@@ -4326,11 +4547,9 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
- unsigned AS =
- MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
- Type *AccessTy =
- getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
- PtrAdd.getMF()->getFunction().getContext());
+ unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
+ Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
+ PtrAdd.getMF()->getFunction().getContext());
const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
AccessTy, AS))
@@ -4519,7 +4738,19 @@ bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
return false;
}
-bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
+bool CombinerHelper::matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register SrcOp = MI.getOperand(1).getReg();
+
+ if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
+ MatchInfo = *MaybeCst;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
Register Op2 = MI.getOperand(2).getReg();
auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
@@ -4529,6 +4760,42 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
return true;
}
+bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo) {
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
+ if (!MaybeCst)
+ return false;
+ MatchInfo =
+ ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
+ return true;
+}
+
+bool CombinerHelper::matchConstantFoldFMA(MachineInstr &MI,
+ ConstantFP *&MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FMA ||
+ MI.getOpcode() == TargetOpcode::G_FMAD);
+ auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
+
+ const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
+ if (!Op3Cst)
+ return false;
+
+ const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
+ if (!Op2Cst)
+ return false;
+
+ const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
+ if (!Op1Cst)
+ return false;
+
+ APFloat Op1F = Op1Cst->getValueAPF();
+ Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
+ APFloat::rmNearestTiesToEven);
+ MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
+ return true;
+}
+
bool CombinerHelper::matchNarrowBinopFeedingAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
// Look for a binop feeding into an AND with a mask:
@@ -6018,12 +6285,36 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
}
-bool CombinerHelper::tryCombine(MachineInstr &MI) {
- if (tryCombineCopy(MI))
- return true;
- if (tryCombineExtendingLoads(MI))
- return true;
- if (tryCombineIndexedLoadStore(MI))
+bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ auto *LHSDef = MRI.getVRegDef(LHS);
+ if (getIConstantVRegVal(LHS, MRI).has_value())
return true;
- return false;
+
+ // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute
+ // as long as we don't already have a constant on the RHS.
+ if (LHSDef->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER)
+ return false;
+ return MRI.getVRegDef(RHS)->getOpcode() !=
+ TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
+ !getIConstantVRegVal(RHS, MRI);
+}
+
+bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ std::optional<FPValueAndVReg> ValAndVReg;
+ if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
+ return false;
+ return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
+}
+
+void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
+ Observer.changingInstr(MI);
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ MI.getOperand(1).setReg(RHSReg);
+ MI.getOperand(2).setReg(LHSReg);
+ Observer.changedInstr(MI);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
index d747cbf5aadc..26752369a771 100644
--- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -26,12 +26,19 @@ GIMatchTableExecutor::MatcherState::MatcherState(unsigned MaxRenderers)
GIMatchTableExecutor::GIMatchTableExecutor() = default;
-bool GIMatchTableExecutor::isOperandImmEqual(
- const MachineOperand &MO, int64_t Value,
- const MachineRegisterInfo &MRI) const {
- if (MO.isReg() && MO.getReg())
+bool GIMatchTableExecutor::isOperandImmEqual(const MachineOperand &MO,
+ int64_t Value,
+ const MachineRegisterInfo &MRI,
+ bool Splat) const {
+ if (MO.isReg() && MO.getReg()) {
if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
return VRegVal->Value.getSExtValue() == Value;
+
+ if (Splat) {
+ if (auto VRegVal = getIConstantSplatVal(MO.getReg(), MRI))
+ return VRegVal->getSExtValue() == Value;
+ }
+ }
return false;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 363ffbfa90b5..ea8c20cdcd45 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "gisel-known-bits"
@@ -48,6 +49,8 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
default:
return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
}
@@ -72,7 +75,7 @@ KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
KnownBits Known;
- computeKnownBitsImpl(R, Known, DemandedElts);
+ computeKnownBitsImpl(R, Known, DemandedElts, Depth);
ComputeKnownBitsCache.clear();
return Known;
}
@@ -726,6 +729,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
default: {
unsigned NumBits =
TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
@@ -769,3 +774,12 @@ void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+
+GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) {
+ if (!Info) {
+ unsigned MaxDepth =
+ MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
+ Info = std::make_unique<GISelKnownBits>(MF, MaxDepth);
+ }
+ return *Info.get();
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 9a67a8d05a4d..14a4e72152e7 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -62,6 +62,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
@@ -80,6 +81,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/MemoryOpRemark.h"
#include <algorithm>
#include <cassert>
@@ -127,7 +129,7 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
+IRTranslator::IRTranslator(CodeGenOptLevel optlevel)
: MachineFunctionPass(ID), OptLevel(optlevel) {}
#ifndef NDEBUG
@@ -173,7 +175,7 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
}
@@ -358,7 +360,7 @@ bool IRTranslator::translateCompare(const User &U,
bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
const ReturnInst &RI = cast<ReturnInst>(U);
const Value *Ret = RI.getReturnValue();
- if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
+ if (Ret && DL->getTypeStoreSize(Ret->getType()).isZero())
Ret = nullptr;
ArrayRef<Register> VRegs;
@@ -578,7 +580,8 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
if (BrInst.isUnconditional()) {
// If the unconditional target is the layout successor, fallthrough.
- if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB))
+ if (OptLevel == CodeGenOptLevel::None ||
+ !CurMBB.isLayoutSuccessor(Succ0MBB))
MIRBuilder.buildBr(*Succ0MBB);
// Link successors.
@@ -720,7 +723,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
return true;
}
- SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
+ SL->findJumpTables(Clusters, &SI, std::nullopt, DefaultMBB, nullptr, nullptr);
SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
@@ -766,7 +769,7 @@ void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
MIB.setMBB(*MBB);
MIB.setDebugLoc(CurBuilder->getDebugLoc());
- Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext());
const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
@@ -789,7 +792,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
// This value may be smaller or larger than the target's pointer type, and
// therefore require extension or truncating.
- Type *PtrIRTy = SValue.getType()->getPointerTo();
+ auto *PtrIRTy = PointerType::getUnqual(SValue.getContext());
const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
@@ -1014,7 +1017,7 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
- Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext());
const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
LLT MaskTy = SwitchOpTy;
@@ -1483,6 +1486,9 @@ bool IRTranslator::translateBitCast(const User &U,
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
+ if (U.getType()->getScalarType()->isBFloatTy() ||
+ U.getOperand(0)->getType()->getScalarType()->isBFloatTy())
+ return false;
Register Op = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
MIRBuilder.buildInstr(Opcode, {Res}, {Op});
@@ -1498,6 +1504,12 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Type *OffsetIRTy = DL->getIndexType(PtrIRTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+ uint32_t Flags = 0;
+ if (isa<Instruction>(U)) {
+ const Instruction &I = cast<Instruction>(U);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
+ }
+
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
unsigned VectorWidth = 0;
@@ -1578,7 +1590,12 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
auto OffsetMIB =
MIRBuilder.buildConstant(OffsetTy, Offset);
- MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
+
+ if (int64_t(Offset) >= 0 && cast<GEPOperator>(U).isInBounds())
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+
+ MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0),
+ Flags);
return true;
}
@@ -1742,6 +1759,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FEXP;
case Intrinsic::exp2:
return TargetOpcode::G_FEXP2;
+ case Intrinsic::exp10:
+ return TargetOpcode::G_FEXP10;
case Intrinsic::fabs:
return TargetOpcode::G_FABS;
case Intrinsic::copysign:
@@ -1797,6 +1816,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_VECREDUCE_FMIN;
case Intrinsic::vector_reduce_fmax:
return TargetOpcode::G_VECREDUCE_FMAX;
+ case Intrinsic::vector_reduce_fminimum:
+ return TargetOpcode::G_VECREDUCE_FMINIMUM;
+ case Intrinsic::vector_reduce_fmaximum:
+ return TargetOpcode::G_VECREDUCE_FMAXIMUM;
case Intrinsic::vector_reduce_add:
return TargetOpcode::G_VECREDUCE_ADD;
case Intrinsic::vector_reduce_mul:
@@ -1819,6 +1842,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_LROUND;
case Intrinsic::llround:
return TargetOpcode::G_LLROUND;
+ case Intrinsic::get_fpmode:
+ return TargetOpcode::G_GET_FPMODE;
}
return Intrinsic::not_intrinsic;
}
@@ -1939,6 +1964,8 @@ bool IRTranslator::translateIfEntryValueArgument(
if (!PhysReg)
return false;
+ // Append an op deref to account for the fact that this is a dbg_declare.
+ Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
MF->setVariableDbgInfo(DebugInst.getVariable(), Expr, *PhysReg,
DebugInst.getDebugLoc());
return true;
@@ -1966,7 +1993,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
// No stack colouring in O0, discard region information.
- if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+ if (MF->getTarget().getOptLevel() == CodeGenOptLevel::None)
return true;
unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
@@ -2041,12 +2068,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
auto &TLI = *MF->getSubtarget().getTargetLowering();
Value *Ptr = CI.getArgOperand(0);
unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+ Align Alignment = getKnownAlignment(Ptr, *DL);
- // FIXME: Get alignment
MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
MachineMemOperand::MOStore,
- ListSize, Align(1)));
+ ListSize, Alignment));
return true;
}
case Intrinsic::dbg_value: {
@@ -2229,31 +2256,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
case Intrinsic::stacksave: {
- // Save the stack pointer to the location provided by the intrinsic.
- Register Reg = getOrCreateVReg(CI);
- Register StackPtr = MF->getSubtarget()
- .getTargetLowering()
- ->getStackPointerRegisterToSaveRestore();
-
- // If the target doesn't specify a stack pointer, then fall back.
- if (!StackPtr)
- return false;
-
- MIRBuilder.buildCopy(Reg, StackPtr);
+ MIRBuilder.buildInstr(TargetOpcode::G_STACKSAVE, {getOrCreateVReg(CI)}, {});
return true;
}
case Intrinsic::stackrestore: {
- // Restore the stack pointer from the location provided by the intrinsic.
- Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
- Register StackPtr = MF->getSubtarget()
- .getTargetLowering()
- ->getStackPointerRegisterToSaveRestore();
-
- // If the target doesn't specify a stack pointer, then fall back.
- if (!StackPtr)
- return false;
-
- MIRBuilder.buildCopy(StackPtr, Reg);
+ MIRBuilder.buildInstr(TargetOpcode::G_STACKRESTORE, {},
+ {getOrCreateVReg(*CI.getArgOperand(0))});
return true;
}
case Intrinsic::cttz:
@@ -2387,6 +2395,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
return CLI->lowerCall(MIRBuilder, Info);
}
+ case Intrinsic::amdgcn_cs_chain:
+ return translateCallBase(CI, MIRBuilder);
case Intrinsic::fptrunc_round: {
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
@@ -2415,6 +2425,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+ case Intrinsic::set_fpmode: {
+ Value *FPState = CI.getOperand(0);
+ MIRBuilder.buildInstr(TargetOpcode::G_SET_FPMODE, {},
+ { getOrCreateVReg(*FPState) });
+ return true;
+ }
+ case Intrinsic::reset_fpmode: {
+ MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {});
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2493,7 +2513,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
auto TII = MF->getTarget().getIntrinsicInfo();
const Function *F = CI.getCalledFunction();
- // FIXME: support Windows dllimport function calls.
+ // FIXME: support Windows dllimport function calls and calls through
+ // weak symbols.
if (F && (F->hasDLLImportStorageClass() ||
(MF->getTarget().getTargetTriple().isOSWindows() &&
F->hasExternalWeakLinkage())))
@@ -2533,8 +2554,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
// Ignore the callsite attributes. Backend code is most likely not expecting
// an intrinsic to sometimes have side effects and sometimes not.
- MachineInstrBuilder MIB =
- MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
+ MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
@@ -2676,6 +2696,13 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
+ // FIXME: support Windows dllimport function calls and calls through
+ // weak symbols.
+ if (Fn && (Fn->hasDLLImportStorageClass() ||
+ (MF->getTarget().getTargetTriple().isOSWindows() &&
+ Fn->hasExternalWeakLinkage())))
+ return false;
+
bool LowerInlineAsm = I.isInlineAsm();
bool NeedEHLabel = true;
@@ -2868,7 +2895,7 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
}
bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
- if (!MF->getTarget().Options.TrapUnreachable)
+ if (!MF->getTarget().Options.TrapUnreachable)
return true;
auto &UI = cast<UnreachableInst>(U);
@@ -2885,7 +2912,7 @@ bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuil
}
}
- MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
return true;
}
@@ -3321,7 +3348,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
CurBuilder->setInsertPt(*ParentBB, ParentBB->end());
// First create the loads to the guard/stack slot for the comparison.
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
- Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext());
const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL));
@@ -3331,7 +3358,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
Register Guard;
Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
- Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+ Align Align = DL->getPrefTypeAlign(PointerType::getUnqual(M.getContext()));
// Generate code to load the content of the guard slot.
Register GuardVal =
@@ -3500,7 +3527,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
const TargetMachine &TM = MF->getTarget();
TM.resetTargetOptions(F);
- EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
+ EnableOpts = OptLevel != CodeGenOptLevel::None && !skipFunction(F);
FuncInfo.MF = MF;
if (EnableOpts) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 3925611f1485..4089a5e941b0 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -133,71 +133,6 @@ static void getRegistersForValue(MachineFunction &MF,
}
}
-/// Return an integer indicating how general CT is.
-static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
- switch (CT) {
- case TargetLowering::C_Immediate:
- case TargetLowering::C_Other:
- case TargetLowering::C_Unknown:
- return 0;
- case TargetLowering::C_Register:
- return 1;
- case TargetLowering::C_RegisterClass:
- return 2;
- case TargetLowering::C_Memory:
- case TargetLowering::C_Address:
- return 3;
- }
- llvm_unreachable("Invalid constraint type");
-}
-
-static void chooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
- const TargetLowering *TLI) {
- assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
- unsigned BestIdx = 0;
- TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
- int BestGenerality = -1;
-
- // Loop over the options, keeping track of the most general one.
- for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
- TargetLowering::ConstraintType CType =
- TLI->getConstraintType(OpInfo.Codes[i]);
-
- // Indirect 'other' or 'immediate' constraints are not allowed.
- if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
- CType == TargetLowering::C_Register ||
- CType == TargetLowering::C_RegisterClass))
- continue;
-
- // If this is an 'other' or 'immediate' constraint, see if the operand is
- // valid for it. For example, on X86 we might have an 'rI' constraint. If
- // the operand is an integer in the range [0..31] we want to use I (saving a
- // load of a register), otherwise we must use 'r'.
- if (CType == TargetLowering::C_Other ||
- CType == TargetLowering::C_Immediate) {
- assert(OpInfo.Codes[i].size() == 1 &&
- "Unhandled multi-letter 'other' constraint");
- // FIXME: prefer immediate constraints if the target allows it
- }
-
- // Things with matching constraints can only be registers, per gcc
- // documentation. This mainly affects "g" constraints.
- if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
- continue;
-
- // This constraint letter is more general than the previous one, use it.
- int Generality = getConstraintGenerality(CType);
- if (Generality > BestGenerality) {
- BestType = CType;
- BestIdx = i;
- BestGenerality = Generality;
- }
- }
-
- OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
- OpInfo.ConstraintType = BestType;
-}
-
static void computeConstraintToUse(const TargetLowering *TLI,
TargetLowering::AsmOperandInfo &OpInfo) {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
@@ -207,7 +142,18 @@ static void computeConstraintToUse(const TargetLowering *TLI,
OpInfo.ConstraintCode = OpInfo.Codes[0];
OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode);
} else {
- chooseConstraint(OpInfo, TLI);
+ TargetLowering::ConstraintGroup G = TLI->getConstraintPreferences(OpInfo);
+ if (G.empty())
+ return;
+ // FIXME: prefer immediate constraints if the target allows it
+ unsigned BestIdx = 0;
+ for (const unsigned E = G.size();
+ BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
+ G[BestIdx].second == TargetLowering::C_Immediate);
+ ++BestIdx)
+ ;
+ OpInfo.ConstraintCode = G[BestIdx].first;
+ OpInfo.ConstraintType = G[BestIdx].second;
}
// 'X' matches anything.
@@ -229,8 +175,8 @@ static void computeConstraintToUse(const TargetLowering *TLI,
}
static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) {
- unsigned Flag = I.getOperand(OpIdx).getImm();
- return InlineAsm::getNumOperandRegisters(Flag);
+ const InlineAsm::Flag F(I.getOperand(OpIdx).getImm());
+ return F.getNumOperandRegisters();
}
static bool buildAnyextOrCopy(Register Dst, Register Src,
@@ -373,16 +319,16 @@ bool InlineAsmLowering::lowerInlineAsm(
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM instruction to know about this
// output.
- unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
- Inst.addImm(OpFlags);
+ InlineAsm::Flag Flag(InlineAsm::Kind::Mem, 1);
+ Flag.setMemConstraint(ConstraintID);
+ Inst.addImm(Flag);
ArrayRef<Register> SourceRegs =
GetOrCreateVRegs(*OpInfo.CallOperandVal);
assert(
@@ -405,17 +351,17 @@ bool InlineAsmLowering::lowerInlineAsm(
// Add information to the INLINEASM instruction to know that this
// register is set.
- unsigned Flag = InlineAsm::getFlagWord(
- OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
- : InlineAsm::Kind_RegDef,
- OpInfo.Regs.size());
+ InlineAsm::Flag Flag(OpInfo.isEarlyClobber
+ ? InlineAsm::Kind::RegDefEarlyClobber
+ : InlineAsm::Kind::RegDef,
+ OpInfo.Regs.size());
if (OpInfo.Regs.front().isVirtual()) {
// Put the register class of the virtual registers in the flag word.
// That way, later passes can recompute register class constraints for
// inline assembly as well as normal instructions. Don't do this for
// tied operands that can use the regclass information from the def.
const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
- Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ Flag.setRegClass(RC->getID());
}
Inst.addImm(Flag);
@@ -441,14 +387,13 @@ bool InlineAsmLowering::lowerInlineAsm(
InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1;
assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag");
- unsigned MatchedOperandFlag = Inst->getOperand(InstFlagIdx).getImm();
- if (InlineAsm::isMemKind(MatchedOperandFlag)) {
+ const InlineAsm::Flag MatchedOperandFlag(Inst->getOperand(InstFlagIdx).getImm());
+ if (MatchedOperandFlag.isMemKind()) {
LLVM_DEBUG(dbgs() << "Matching input constraint to mem operand not "
"supported. This should be target specific.\n");
return false;
}
- if (!InlineAsm::isRegDefKind(MatchedOperandFlag) &&
- !InlineAsm::isRegDefEarlyClobberKind(MatchedOperandFlag)) {
+ if (!MatchedOperandFlag.isRegDefKind() && !MatchedOperandFlag.isRegDefEarlyClobberKind()) {
LLVM_DEBUG(dbgs() << "Unknown matching constraint\n");
return false;
}
@@ -470,9 +415,9 @@ bool InlineAsmLowering::lowerInlineAsm(
}
// Add Flag and input register operand (In) to Inst. Tie In to Def.
- unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
- unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx);
- Inst.addImm(Flag);
+ InlineAsm::Flag UseFlag(InlineAsm::Kind::RegUse, 1);
+ UseFlag.setMatchingOp(DefIdx);
+ Inst.addImm(UseFlag);
Inst.addReg(In);
Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1);
break;
@@ -501,8 +446,8 @@ bool InlineAsmLowering::lowerInlineAsm(
"Expected constraint to be lowered to at least one operand");
// Add information to the INLINEASM node to know about this input.
- unsigned OpFlags =
- InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ const unsigned OpFlags =
+ InlineAsm::Flag(InlineAsm::Kind::Imm, Ops.size());
Inst.addImm(OpFlags);
Inst.add(Ops);
break;
@@ -518,10 +463,10 @@ bool InlineAsmLowering::lowerInlineAsm(
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1);
+ OpFlags.setMemConstraint(ConstraintID);
Inst.addImm(OpFlags);
ArrayRef<Register> SourceRegs =
GetOrCreateVRegs(*OpInfo.CallOperandVal);
@@ -563,11 +508,11 @@ bool InlineAsmLowering::lowerInlineAsm(
return false;
}
- unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
+ InlineAsm::Flag Flag(InlineAsm::Kind::RegUse, NumRegs);
if (OpInfo.Regs.front().isVirtual()) {
// Put the register class of the virtual registers in the flag word.
const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
- Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ Flag.setRegClass(RC->getID());
}
Inst.addImm(Flag);
if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
@@ -578,10 +523,9 @@ bool InlineAsmLowering::lowerInlineAsm(
case InlineAsm::isClobber: {
- unsigned NumRegs = OpInfo.Regs.size();
+ const unsigned NumRegs = OpInfo.Regs.size();
if (NumRegs > 0) {
- unsigned Flag =
- InlineAsm::getFlagWord(InlineAsm::Kind_Clobber, NumRegs);
+ unsigned Flag = InlineAsm::Flag(InlineAsm::Kind::Clobber, NumRegs);
Inst.addImm(Flag);
for (Register Reg : OpInfo.Regs) {
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 9bbef11067ae..baea773cf528 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -58,21 +58,21 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
-InstructionSelect::InstructionSelect(CodeGenOpt::Level OL)
+InstructionSelect::InstructionSelect(CodeGenOptLevel OL)
: MachineFunctionPass(ID), OptLevel(OL) {}
// In order not to crash when calling getAnalysis during testing with -run-pass
// we use the default opt level here instead of None, so that the addRequired()
// calls are made in getAnalysisUsage().
InstructionSelect::InstructionSelect()
- : MachineFunctionPass(ID), OptLevel(CodeGenOpt::Default) {}
+ : MachineFunctionPass(ID), OptLevel(CodeGenOptLevel::Default) {}
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelKnownBitsAnalysis>();
AU.addPreserved<GISelKnownBitsAnalysis>();
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
AU.addRequired<ProfileSummaryInfoWrapperPass>();
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
@@ -90,14 +90,15 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+ ISel->setTargetPassConfig(&TPC);
- CodeGenOpt::Level OldOptLevel = OptLevel;
+ CodeGenOptLevel OldOptLevel = OptLevel;
auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; });
- OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None
+ OptLevel = MF.getFunction().hasOptNone() ? CodeGenOptLevel::None
: MF.getTarget().getOptLevel();
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (PSI && PSI->hasProfileSummary())
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
@@ -109,6 +110,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+ ISel->setRemarkEmitter(&MORE);
// FIXME: There are many other MF/MFI fields we need to initialize.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
index 8cfb1b786c24..45b403bdd076 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
@@ -76,6 +76,9 @@ LegacyLegalizerInfo::LegacyLegalizerInfo() {
setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS, 0,
+ {{1, Legal}});
setLegalizeScalarToDifferentSizeStrategy(
TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index aecbe0b7604c..6d75258c1041 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -218,7 +218,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
// This will keep all the observers notified about new insertions/deletions.
RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB);
- LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
+ LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI, KB);
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
do {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index f0da0d88140f..37e7153be572 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -119,8 +119,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
MIRBuilder.setInstrAndDebugLoc(MI);
- if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
- MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
+ if (isa<GIntrinsic>(MI))
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
@@ -526,6 +525,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(EXP_F);
case TargetOpcode::G_FEXP2:
RTLIBCASE(EXP2_F);
+ case TargetOpcode::G_FEXP10:
+ RTLIBCASE(EXP10_F);
case TargetOpcode::G_FREM:
RTLIBCASE(REM_F);
case TargetOpcode::G_FPOW:
@@ -690,7 +691,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
LLT OpLLT = MRI.getType(Reg);
Type *OpTy = nullptr;
if (OpLLT.isPointer())
- OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
+ OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
else
OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
Args.push_back({Reg, OpTy, 0});
@@ -795,10 +796,134 @@ conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
{{MI.getOperand(1).getReg(), FromType, 0}});
}
+static RTLIB::Libcall
+getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
+ RTLIB::Libcall RTLibcall;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_GET_FPMODE:
+ RTLibcall = RTLIB::FEGETMODE;
+ break;
+ case TargetOpcode::G_SET_FPMODE:
+ case TargetOpcode::G_RESET_FPMODE:
+ RTLibcall = RTLIB::FESETMODE;
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+ return RTLibcall;
+}
+
+// Some library functions that read FP state (fegetmode, fegetenv) write the
+// state into a region in memory. IR intrinsics that do the same operations
+// (get_fpmode, get_fpenv) return the state as integer value. To implement these
+// intrinsics via the library functions, we need to use temporary variable,
+// for example:
+//
+// %0:_(s32) = G_GET_FPMODE
+//
+// is transformed to:
+//
+// %1:_(p0) = G_FRAME_INDEX %stack.0
+// BL &fegetmode
+// %0:_(s32) = G_LOAD % 1
+//
+LegalizerHelper::LegalizeResult
+LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
+ MachineInstr &MI) {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto &MRI = *MIRBuilder.getMRI();
+ auto &Ctx = MF.getFunction().getContext();
+
+ // Create temporary, where library function will put the read state.
+ Register Dst = MI.getOperand(0).getReg();
+ LLT StateTy = MRI.getType(Dst);
+ TypeSize StateSize = StateTy.getSizeInBytes();
+ Align TempAlign = getStackTemporaryAlignment(StateTy);
+ MachinePointerInfo TempPtrInfo;
+ auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
+
+ // Create a call to library function, with the temporary as an argument.
+ unsigned TempAddrSpace = DL.getAllocaAddrSpace();
+ Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
+ RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
+ auto Res =
+ createLibcall(MIRBuilder, RTLibcall,
+ CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
+ CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}));
+ if (Res != LegalizerHelper::Legalized)
+ return Res;
+
+ // Create a load from the temporary.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
+
+ return LegalizerHelper::Legalized;
+}
+
+// Similar to `createGetStateLibcall` the function calls a library function
+// using transient space in stack. In this case the library function reads
+// content of memory region.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
+ MachineInstr &MI) {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto &MRI = *MIRBuilder.getMRI();
+ auto &Ctx = MF.getFunction().getContext();
+
+ // Create temporary, where library function will get the new state.
+ Register Src = MI.getOperand(0).getReg();
+ LLT StateTy = MRI.getType(Src);
+ TypeSize StateSize = StateTy.getSizeInBytes();
+ Align TempAlign = getStackTemporaryAlignment(StateTy);
+ MachinePointerInfo TempPtrInfo;
+ auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
+
+ // Put the new state into the temporary.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
+ MIRBuilder.buildStore(Src, Temp, *MMO);
+
+ // Create a call to library function, with the temporary as an argument.
+ unsigned TempAddrSpace = DL.getAllocaAddrSpace();
+ Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
+ RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
+ return createLibcall(MIRBuilder, RTLibcall,
+ CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
+ CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}));
+}
+
+// The function is used to legalize operations that set default environment
+// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
+// On most targets supported in glibc FE_DFL_MODE is defined as
+// `((const femode_t *) -1)`. Such assumption is used here. If for some target
+// it is not true, the target must provide custom lowering.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
+ MachineInstr &MI) {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto &Ctx = MF.getFunction().getContext();
+
+ // Create an argument for the library function.
+ unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
+ Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
+ unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
+ LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
+ auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
+ DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
+ MIRBuilder.buildIntToPtr(Dest, DefValue);
+
+ RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
+ return createLibcall(MIRBuilder, RTLibcall,
+ CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
+ CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0}));
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
- LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
- unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
switch (MI.getOpcode()) {
@@ -810,6 +935,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM:
case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = LLTy.getSizeInBits();
Type *HLTy = IntegerType::get(Ctx, Size);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
@@ -831,6 +958,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FEXP10:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FMINNUM:
@@ -839,6 +967,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = LLTy.getSizeInBits();
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
@@ -901,6 +1031,24 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
MI.eraseFromParent();
return Result;
}
+ case TargetOpcode::G_GET_FPMODE: {
+ LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI);
+ if (Result != Legalized)
+ return Result;
+ break;
+ }
+ case TargetOpcode::G_SET_FPMODE: {
+ LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI);
+ if (Result != Legalized)
+ return Result;
+ break;
+ }
+ case TargetOpcode::G_RESET_FPMODE: {
+ LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI);
+ if (Result != Legalized)
+ return Result;
+ break;
+ }
}
MI.eraseFromParent();
@@ -1297,7 +1445,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// So long as the new type has more bits than the bits we're extending we
// don't need to break it apart.
- if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
+ if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
Observer.changingInstr(MI);
// We don't lose any non-extension bits by truncating the src and
// sign-extending the dst.
@@ -1340,14 +1488,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Register AshrCstReg =
MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
.getReg(0);
- Register FullExtensionReg = 0;
- Register PartialExtensionReg = 0;
+ Register FullExtensionReg;
+ Register PartialExtensionReg;
// Do the operation on each small part.
for (int i = 0; i < NumParts; ++i) {
- if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
+ if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
DstRegs.push_back(SrcRegs[i]);
- else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
+ PartialExtensionReg = DstRegs.back();
+ } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
assert(PartialExtensionReg &&
"Expected to visit partial extension before full");
if (FullExtensionReg) {
@@ -1993,8 +2142,20 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
- auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
- {LeftOperand, RightOperand});
+ // Multiplication cannot overflow if the WideTy is >= 2 * original width,
+ // so we don't need to check the overflow result of larger type Mulo.
+ bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
+
+ unsigned MulOpc =
+ WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
+
+ MachineInstrBuilder Mulo;
+ if (WideMulCanOverflow)
+ Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
+ {LeftOperand, RightOperand});
+ else
+ Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
+
auto Mul = Mulo->getOperand(0);
MIRBuilder.buildTrunc(Result, Mul);
@@ -2012,9 +2173,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
}
- // Multiplication cannot overflow if the WideTy is >= 2 * original width,
- // so we don't need to check the overflow result of larger type Mulo.
- if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
+ if (WideMulCanOverflow) {
auto Overflow =
MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
// Finally check if the multiplication in the larger type itself overflowed.
@@ -2247,6 +2406,16 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_ROTR:
+ case TargetOpcode::G_ROTL:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_SMIN:
@@ -2325,6 +2494,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_IS_FPCLASS:
Observer.changingInstr(MI);
if (TypeIdx == 0)
@@ -2494,6 +2664,17 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
+ const LLT WideEltTy = WideTy.getElementType();
+
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
if (TypeIdx == 1) {
Observer.changingInstr(MI);
@@ -2546,6 +2727,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FEXP10:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
@@ -2648,6 +2830,23 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ case TargetOpcode::G_VECREDUCE_FMINIMUM:
+ case TargetOpcode::G_VECREDUCE_FMAXIMUM:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ LLT WideVecTy = VecTy.isVector()
+ ? LLT::vector(VecTy.getElementCount(), WideTy)
+ : WideTy;
+ widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
}
}
@@ -3384,10 +3583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ case TargetOpcode::G_FRINT: {
// Since round even is the assumed rounding mode for unconstrained FP
// operations, rint and roundeven are the same operation.
- changeOpcode(MI, TargetOpcode::G_FRINT);
+ changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
@@ -3421,12 +3620,25 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
}
case G_UADDE: {
auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
- LLT Ty = MRI.getType(Res);
+ const LLT CondTy = MRI.getType(CarryOut);
+ const LLT Ty = MRI.getType(Res);
+ // Initial add of the two operands.
auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
+
+ // Initial check for carry.
+ auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
+
+ // Add the sum and the carry.
auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
- MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
+
+ // Second check for carry. We can only carry if the initial sum is all 1s
+ // and the carry is set, resulting in a new sum of 0.
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
+ auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
+ MIRBuilder.buildOr(CarryOut, Carry, Carry2);
MI.eraseFromParent();
return Legalized;
@@ -3445,13 +3657,23 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
const LLT CondTy = MRI.getType(BorrowOut);
const LLT Ty = MRI.getType(Res);
+ // Initial subtract of the two operands.
auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
+
+ // Initial check for borrow.
+ auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
+
+ // Subtract the borrow from the first subtract.
auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
- auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
- auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
- MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
+ // Second check for borrow. We can only borrow if the initial difference is
+ // 0 and the borrow is set, resulting in a new difference of all 1s.
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ auto TmpResEqZero =
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
+ auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
+ MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
MI.eraseFromParent();
return Legalized;
@@ -3503,6 +3725,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerShuffleVector(MI);
case G_DYN_STACKALLOC:
return lowerDynStackAlloc(MI);
+ case G_STACKSAVE:
+ return lowerStackSave(MI);
+ case G_STACKRESTORE:
+ return lowerStackRestore(MI);
case G_EXTRACT:
return lowerExtract(MI);
case G_INSERT:
@@ -3559,8 +3785,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerMemCpyFamily(MI);
case G_MEMCPY_INLINE:
return lowerMemcpyInline(MI);
+ case G_ZEXT:
+ case G_SEXT:
+ case G_ANYEXT:
+ return lowerEXT(MI);
+ case G_TRUNC:
+ return lowerTRUNC(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
+ case G_VAARG:
+ return lowerVAArg(MI);
}
}
@@ -4168,6 +4402,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FPOW:
case G_FEXP:
case G_FEXP2:
+ case G_FEXP10:
case G_FLOG:
case G_FLOG2:
case G_FLOG10:
@@ -4425,73 +4660,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
return Legalized;
}
-static unsigned getScalarOpcForReduction(unsigned Opc) {
- unsigned ScalarOpc;
- switch (Opc) {
- case TargetOpcode::G_VECREDUCE_FADD:
- ScalarOpc = TargetOpcode::G_FADD;
- break;
- case TargetOpcode::G_VECREDUCE_FMUL:
- ScalarOpc = TargetOpcode::G_FMUL;
- break;
- case TargetOpcode::G_VECREDUCE_FMAX:
- ScalarOpc = TargetOpcode::G_FMAXNUM;
- break;
- case TargetOpcode::G_VECREDUCE_FMIN:
- ScalarOpc = TargetOpcode::G_FMINNUM;
- break;
- case TargetOpcode::G_VECREDUCE_ADD:
- ScalarOpc = TargetOpcode::G_ADD;
- break;
- case TargetOpcode::G_VECREDUCE_MUL:
- ScalarOpc = TargetOpcode::G_MUL;
- break;
- case TargetOpcode::G_VECREDUCE_AND:
- ScalarOpc = TargetOpcode::G_AND;
- break;
- case TargetOpcode::G_VECREDUCE_OR:
- ScalarOpc = TargetOpcode::G_OR;
- break;
- case TargetOpcode::G_VECREDUCE_XOR:
- ScalarOpc = TargetOpcode::G_XOR;
- break;
- case TargetOpcode::G_VECREDUCE_SMAX:
- ScalarOpc = TargetOpcode::G_SMAX;
- break;
- case TargetOpcode::G_VECREDUCE_SMIN:
- ScalarOpc = TargetOpcode::G_SMIN;
- break;
- case TargetOpcode::G_VECREDUCE_UMAX:
- ScalarOpc = TargetOpcode::G_UMAX;
- break;
- case TargetOpcode::G_VECREDUCE_UMIN:
- ScalarOpc = TargetOpcode::G_UMIN;
- break;
- default:
- llvm_unreachable("Unhandled reduction");
- }
- return ScalarOpc;
-}
-
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
- unsigned Opc = MI.getOpcode();
- assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
- Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
- "Sequential reductions not expected");
+ auto &RdxMI = cast<GVecReduce>(MI);
if (TypeIdx != 1)
return UnableToLegalize;
// The semantics of the normal non-sequential reductions allow us to freely
// re-associate the operation.
- auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
if (NarrowTy.isVector() &&
(SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
return UnableToLegalize;
- unsigned ScalarOpc = getScalarOpcForReduction(Opc);
+ unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
SmallVector<Register> SplitSrcs;
// If NarrowTy is a scalar then we're being asked to scalarize.
const unsigned NumParts =
@@ -4536,10 +4720,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
SmallVector<Register> PartialReductions;
for (unsigned Part = 0; Part < NumParts; ++Part) {
PartialReductions.push_back(
- MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
+ MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
+ .getReg(0));
}
-
// If the types involved are powers of 2, we can generate intermediate vector
// ops, before generating a final reduction operation.
if (isPowerOf2_32(SrcTy.getNumElements()) &&
@@ -4836,7 +5020,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_SUB:
case TargetOpcode::G_MUL:
case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
case TargetOpcode::G_SADDSAT:
@@ -4886,6 +5072,14 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FREEZE:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_BSWAP:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_SEXT_INREG:
@@ -4943,15 +5137,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_TRUNC: {
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_TRUNC:
case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FPEXT: {
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -5765,8 +5957,10 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+ Observer.changingInstr(MI);
MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
MI.getOperand(1).setReg(MIBTmp.getReg(0));
+ Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_CTPOP: {
@@ -5956,6 +6150,105 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
return Result;
}
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ uint32_t DstTySize = DstTy.getSizeInBits();
+ uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
+ uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
+
+ if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
+ !isPowerOf2_32(SrcTyScalarSize))
+ return UnableToLegalize;
+
+ // The step between extend is too large, split it by creating an intermediate
+ // extend instruction
+ if (SrcTyScalarSize * 2 < DstTyScalarSize) {
+ LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
+ // If the destination type is illegal, split it into multiple statements
+ // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
+ auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
+ // Unmerge the vector
+ LLT EltTy = MidTy.changeElementCount(
+ MidTy.getElementCount().divideCoefficientBy(2));
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
+
+ // ZExt the vectors
+ LLT ZExtResTy = DstTy.changeElementCount(
+ DstTy.getElementCount().divideCoefficientBy(2));
+ auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
+ {UnmergeSrc.getReg(0)});
+ auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
+ {UnmergeSrc.getReg(1)});
+
+ // Merge the ending vectors
+ MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
+ // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // Similar to how operand splitting is done in SelectiondDAG, we can handle
+ // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
+ // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
+ // %lo16(<4 x s16>) = G_TRUNC %inlo
+ // %hi16(<4 x s16>) = G_TRUNC %inhi
+ // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
+ // %res(<8 x s8>) = G_TRUNC %in16
+
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
+ isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
+ isPowerOf2_32(SrcTy.getNumElements()) &&
+ isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
+ // Split input type.
+ LLT SplitSrcTy = SrcTy.changeElementCount(
+ SrcTy.getElementCount().divideCoefficientBy(2));
+
+ // First, split the source into two smaller vectors.
+ SmallVector<Register, 2> SplitSrcs;
+ extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
+
+ // Truncate the splits into intermediate narrower elements.
+ LLT InterTy;
+ if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+ InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
+ else
+ InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
+ for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
+ SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
+ }
+
+ // Combine the new truncates into one vector
+ auto Merge = MIRBuilder.buildMergeLikeInstr(
+ DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
+
+ // Truncate the new vector to the final result type
+ if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
+ else
+ MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
+
+ MI.eraseFromParent();
+
+ return Legalized;
+ }
+ return UnableToLegalize;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
@@ -6523,23 +6816,25 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
// round(x) =>
// t = trunc(x);
// d = fabs(x - t);
- // o = copysign(1.0f, x);
- // return t + (d >= 0.5 ? o : 0.0);
+ // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
+ // return t + o;
auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
- auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
- auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+
auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
- auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
+ auto Cmp =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
- auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
- Flags);
- auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
+ // Could emit G_UITOFP instead
+ auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+ auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
+ auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
- MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
+ MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
MI.eraseFromParent();
return Legalized;
@@ -6688,8 +6983,8 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
Align EltAlign;
MachinePointerInfo PtrInfo;
- auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
- VecAlign, PtrInfo);
+ auto StackTemp = createStackTemporary(
+ TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
// Get the pointer to the element, and be sure not to hit undefined behavior
@@ -6727,26 +7022,9 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
LLT IdxTy = LLT::scalar(32);
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
-
- if (DstTy.isScalar()) {
- if (Src0Ty.isVector())
- return UnableToLegalize;
-
- // This is just a SELECT.
- assert(Mask.size() == 1 && "Expected a single mask element");
- Register Val;
- if (Mask[0] < 0 || Mask[0] > 1)
- Val = MIRBuilder.buildUndef(DstTy).getReg(0);
- else
- Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
- MIRBuilder.buildCopy(DstReg, Val);
- MI.eraseFromParent();
- return Legalized;
- }
-
Register Undef;
SmallVector<Register, 32> BuildVec;
- LLT EltTy = DstTy.getElementType();
+ LLT EltTy = DstTy.getScalarType();
for (int Idx : Mask) {
if (Idx < 0) {
@@ -6768,26 +7046,20 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
}
}
- MIRBuilder.buildBuildVector(DstReg, BuildVec);
+ if (DstTy.isScalar())
+ MIRBuilder.buildCopy(DstReg, BuildVec[0]);
+ else
+ MIRBuilder.buildBuildVector(DstReg, BuildVec);
MI.eraseFromParent();
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
- const auto &MF = *MI.getMF();
- const auto &TFI = *MF.getSubtarget().getFrameLowering();
- if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
- return UnableToLegalize;
-
- Register Dst = MI.getOperand(0).getReg();
- Register AllocSize = MI.getOperand(1).getReg();
- Align Alignment = assumeAligned(MI.getOperand(2).getImm());
-
- LLT PtrTy = MRI.getType(Dst);
+Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
+ Register AllocSize,
+ Align Alignment,
+ LLT PtrTy) {
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
- Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
@@ -6802,7 +7074,25 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
}
- SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
+ return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
+ const auto &MF = *MI.getMF();
+ const auto &TFI = *MF.getSubtarget().getFrameLowering();
+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register AllocSize = MI.getOperand(1).getReg();
+ Align Alignment = assumeAligned(MI.getOperand(2).getImm());
+
+ LLT PtrTy = MRI.getType(Dst);
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ Register SPTmp =
+ getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
+
MIRBuilder.buildCopy(SPReg, SPTmp);
MIRBuilder.buildCopy(Dst, SPTmp);
@@ -6811,6 +7101,28 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerStackSave(MachineInstr &MI) {
+ Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
+ if (!StackPtr)
+ return UnableToLegalize;
+
+ MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
+ Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
+ if (!StackPtr)
+ return UnableToLegalize;
+
+ MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerExtract(MachineInstr &MI) {
auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Offset = MI.getOperand(2).getImm();
@@ -7577,6 +7889,56 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
return UnableToLegalize;
}
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
+ MachineFunction &MF = *MI.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ Register ListPtr = MI.getOperand(1).getReg();
+ LLT PtrTy = MRI.getType(ListPtr);
+
+ // LstPtr is a pointer to the head of the list. Get the address
+ // of the head of the list.
+ Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
+ MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
+ auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
+
+ const Align A(MI.getOperand(2).getImm());
+ LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
+ if (A > TLI.getMinStackArgumentAlignment()) {
+ Register AlignAmt =
+ MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
+ auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
+ auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
+ VAList = AndDst.getReg(0);
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ // The list should be bumped by the size of element in the current head of
+ // list.
+ Register Dst = MI.getOperand(0).getReg();
+ LLT LLTTy = MRI.getType(Dst);
+ Type *Ty = getTypeForLLT(LLTTy, Ctx);
+ auto IncAmt =
+ MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
+ auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
+
+ // Store the increment VAList to the legalized pointer
+ MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
+ MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
+ // Load the actual argument out of the pointer VAList
+ Align EltAlignment = DL.getABITypeAlign(Ty);
+ MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
+ MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 1f2e481c63e0..de9931d1c240 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -77,13 +77,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) {
}
raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
- OS << Opcode << ", Tys={";
+ OS << "Opcode=" << Opcode << ", Tys={";
for (const auto &Type : Types) {
OS << Type << ", ";
}
- OS << "}, Opcode=";
-
- OS << Opcode << ", MMOs={";
+ OS << "}, MMOs={";
for (const auto &MMODescr : MMODescrs) {
OS << MMODescr.MemoryTy << ", ";
}
@@ -102,6 +100,7 @@ static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
case Lower:
case MoreElements:
case FewerElements:
+ case Libcall:
break;
default:
return Q.Types[Mutation.first] != Mutation.second;
@@ -118,6 +117,10 @@ static bool mutationIsSane(const LegalizeRule &Rule,
if (Rule.getAction() == Custom || Rule.getAction() == Legal)
return true;
+ // Skip null mutation.
+ if (!Mutation.second.isValid())
+ return true;
+
const unsigned TypeIdx = Mutation.first;
const LLT OldTy = Q.Types[TypeIdx];
const LLT NewTy = Mutation.second;
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 49f40495d6fc..246aa88b09ac 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -934,9 +934,8 @@ void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
BitVector LegalSizes(MaxStoreSizeToForm * 2);
const auto &LI = *MF->getSubtarget().getLegalizerInfo();
const auto &DL = MF->getFunction().getParent()->getDataLayout();
- Type *IntPtrIRTy =
- DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
- LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
+ Type *IRPtrTy = PointerType::get(MF->getFunction().getContext(), AddrSpace);
+ LLT PtrTy = getLLTForType(*IRPtrTy, DL);
// We assume that we're not going to be generating any stores wider than
// MaxStoreSizeToForm bits for now.
for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 962b54ec5d6b..80e9c08e850b 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -196,14 +196,14 @@ void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0,
assert((Res == Op0) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res,
- const SrcOp &Op0,
- const SrcOp &Op1) {
+MachineInstrBuilder
+MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
+ const SrcOp &Op1, std::optional<unsigned> Flags) {
assert(Res.getLLTTy(*getMRI()).getScalarType().isPointer() &&
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type");
- return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1});
+ return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags);
}
std::optional<MachineInstrBuilder>
@@ -775,30 +775,55 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
}
-MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
- ArrayRef<Register> ResultRegs,
- bool HasSideEffects) {
- auto MIB =
- buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
- : TargetOpcode::G_INTRINSIC);
+static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) {
+ if (HasSideEffects && IsConvergent)
+ return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS;
+ if (HasSideEffects)
+ return TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
+ if (IsConvergent)
+ return TargetOpcode::G_INTRINSIC_CONVERGENT;
+ return TargetOpcode::G_INTRINSIC;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<Register> ResultRegs,
+ bool HasSideEffects, bool isConvergent) {
+ auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
for (unsigned ResultReg : ResultRegs)
MIB.addDef(ResultReg);
MIB.addIntrinsicID(ID);
return MIB;
}
+MachineInstrBuilder
+MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<Register> ResultRegs) {
+ auto Attrs = Intrinsic::getAttributes(getContext(), ID);
+ bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
+ return buildIntrinsic(ID, ResultRegs, HasSideEffects, isConvergent);
+}
+
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
ArrayRef<DstOp> Results,
- bool HasSideEffects) {
- auto MIB =
- buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
- : TargetOpcode::G_INTRINSIC);
+ bool HasSideEffects,
+ bool isConvergent) {
+ auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
for (DstOp Result : Results)
Result.addDefToMIB(*getMRI(), MIB);
MIB.addIntrinsicID(ID);
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<DstOp> Results) {
+ auto Attrs = Intrinsic::getAttributes(getContext(), ID);
+ bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
+ return buildIntrinsic(ID, Results, HasSideEffects, isConvergent);
+}
+
MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
@@ -1040,16 +1065,16 @@ void MachineIRBuilder::validateTruncExt(const LLT DstTy, const LLT SrcTy,
#ifndef NDEBUG
if (DstTy.isVector()) {
assert(SrcTy.isVector() && "mismatched cast between vector and non-vector");
- assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
+ assert(SrcTy.getElementCount() == DstTy.getElementCount() &&
"different number of elements in a trunc/ext");
} else
assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc");
if (IsExtend)
- assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
+ assert(TypeSize::isKnownGT(DstTy.getSizeInBits(), SrcTy.getSizeInBits()) &&
"invalid narrowing extend");
else
- assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() &&
+ assert(TypeSize::isKnownLT(DstTy.getSizeInBits(), SrcTy.getSizeInBits()) &&
"invalid widening trunc");
#endif
}
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 885a1056b2ea..bb5363fb2527 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -449,7 +449,8 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
return MappingCost::ImpossibleCost();
// If mapped with InstrMapping, MI will have the recorded cost.
- MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
+ MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent())
+ : BlockFrequency(1));
bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
assert(!Saturated && "Possible mapping saturated the cost");
LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
@@ -623,7 +624,7 @@ bool RegBankSelect::applyMapping(
// Second, rewrite the instruction.
LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
- RBI->applyMapping(OpdMapper);
+ RBI->applyMapping(MIRBuilder, OpdMapper);
return true;
}
@@ -971,7 +972,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
return Src.canSplitCriticalEdge(DstOrSplit);
}
-RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
+RegBankSelect::MappingCost::MappingCost(BlockFrequency LocalFreq)
: LocalFreq(LocalFreq.getFrequency()) {}
bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 080600d3cc98..eaf829f562b2 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -205,8 +205,15 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg,
return false;
// Replace if either DstReg has no constraints or the register
// constraints match.
- return !MRI.getRegClassOrRegBank(DstReg) ||
- MRI.getRegClassOrRegBank(DstReg) == MRI.getRegClassOrRegBank(SrcReg);
+ const auto &DstRBC = MRI.getRegClassOrRegBank(DstReg);
+ if (!DstRBC || DstRBC == MRI.getRegClassOrRegBank(SrcReg))
+ return true;
+
+ // Otherwise match if the Src is already a regclass that is covered by the Dst
+ // RegBank.
+ return DstRBC.is<const RegisterBank *>() && MRI.getRegClassOrNull(SrcReg) &&
+ DstRBC.get<const RegisterBank *>()->covers(
+ *MRI.getRegClassOrNull(SrcReg));
}
bool llvm::isTriviallyDead(const MachineInstr &MI,
@@ -773,6 +780,29 @@ std::optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode,
return std::nullopt;
}
+std::optional<APInt> llvm::ConstantFoldCastOp(unsigned Opcode, LLT DstTy,
+ const Register Op0,
+ const MachineRegisterInfo &MRI) {
+ std::optional<APInt> Val = getIConstantVRegVal(Op0, MRI);
+ if (!Val)
+ return Val;
+
+ const unsigned DstSize = DstTy.getScalarSizeInBits();
+
+ switch (Opcode) {
+ case TargetOpcode::G_SEXT:
+ return Val->sext(DstSize);
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ // TODO: DAG considers target preference when constant folding any_extend.
+ return Val->zext(DstSize);
+ default:
+ break;
+ }
+
+ llvm_unreachable("unexpected cast opcode to constant fold");
+}
+
std::optional<APFloat>
llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src,
const MachineRegisterInfo &MRI) {
@@ -1086,9 +1116,9 @@ std::optional<APInt>
llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) {
if (auto SplatValAndReg =
getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) {
- std::optional<ValueAndVReg> ValAndVReg =
- getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
- return ValAndVReg->Value;
+ if (std::optional<ValueAndVReg> ValAndVReg =
+ getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI))
+ return ValAndVReg->Value;
}
return std::nullopt;
@@ -1143,7 +1173,7 @@ llvm::getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI) {
if (auto Splat = getIConstantSplatSExtVal(MI, MRI))
return RegOrConstant(*Splat);
auto Reg = MI.getOperand(1).getReg();
- if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
+ if (any_of(drop_begin(MI.operands(), 2),
[&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; }))
return std::nullopt;
return RegOrConstant(Reg);