aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-06-13 19:31:46 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-06-13 19:37:19 +0000
commite8d8bef961a50d4dc22501cde4fb9fb0be1b2532 (patch)
tree94f04805f47bb7c59ae29690d8952b6074fff602 /contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
parentbb130ff39747b94592cb26d71b7cb097b9a4ea6b (diff)
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
downloadsrc-e8d8bef961a50d4dc22501cde4fb9fb0be1b2532.tar.gz
src-e8d8bef961a50d4dc22501cde4fb9fb0be1b2532.zip
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1974
1 files changed, 1907 insertions, 67 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 194961ae3b21..df0219fcfa64 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -34,7 +35,6 @@ static cl::opt<bool>
cl::desc("Force all indexed operations to be "
"legal for the GlobalISel combiner"));
-
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
MachineDominatorTree *MDT,
@@ -44,6 +44,75 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
(void)this->KB;
}
+const TargetLowering &CombinerHelper::getTargetLowering() const {
+ return *Builder.getMF().getSubtarget().getTargetLowering();
+}
+
+/// \returns The little endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 0
+static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return I;
+}
+
+/// \returns The big endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 3
+static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return ByteWidth - I - 1;
+}
+
+/// Given a map from byte offsets in memory to indices in a load/store,
+/// determine if that map corresponds to a little or big endian byte pattern.
+///
+/// \param MemOffset2Idx maps memory offsets to address offsets.
+/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
+///
+/// \returns true if the map corresponds to a big endian byte pattern, false
+/// if it corresponds to a little endian byte pattern, and None otherwise.
+///
+/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
+/// are as follows:
+///
+/// AddrOffset Little endian Big endian
+/// 0 0 3
+/// 1 1 2
+/// 2 2 1
+/// 3 3 0
+static Optional<bool>
+isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ int64_t LowestIdx) {
+ // Need at least two byte positions to decide on endianness.
+ unsigned Width = MemOffset2Idx.size();
+ if (Width < 2)
+ return None;
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
+ auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
+ if (MemOffsetAndIdx == MemOffset2Idx.end())
+ return None;
+ const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
+ assert(Idx >= 0 && "Expected non-negative byte offset?");
+ LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
+ BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
+ if (!BigEndian && !LittleEndian)
+ return None;
+ }
+
+ assert((BigEndian != LittleEndian) &&
+ "Pattern cannot be both big and little endian!");
+ return BigEndian;
+}
+
+bool CombinerHelper::isLegalOrBeforeLegalizer(
+ const LegalityQuery &Query) const {
+ return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
+}
+
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
@@ -555,13 +624,13 @@ bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
return false;
-
- // Loop through the basic block until we find one of the instructions.
- MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
- for (; &*I != &DefMI && &*I != &UseMI; ++I)
- return &*I == &DefMI;
-
- llvm_unreachable("Block must contain instructions");
+ const MachineBasicBlock &MBB = *DefMI.getParent();
+ auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
+ return &MI == &DefMI || &MI == &UseMI;
+ });
+ if (DefOrUse == MBB.end())
+ llvm_unreachable("Block must contain both DefMI and UseMI!");
+ return &*DefOrUse == &DefMI;
}
bool CombinerHelper::dominates(const MachineInstr &DefMI,
@@ -576,20 +645,97 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI,
return isPredecessor(DefMI, UseMI);
}
-bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
+bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
Register SrcReg = MI.getOperand(1).getReg();
- unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
- unsigned NumSextBits =
- MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
- MI.getOperand(2).getImm();
- return SrcSignBits >= NumSextBits;
+ Register LoadUser = SrcReg;
+
+ if (MRI.getType(SrcReg).isVector())
+ return false;
+
+ Register TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
+ LoadUser = TruncSrc;
+
+ uint64_t SizeInBits = MI.getOperand(2).getImm();
+ // If the source is a G_SEXTLOAD from the same bit width, then we don't
+ // need any extend at all, just a truncate.
+ if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
+ const auto &MMO = **LoadMI->memoperands_begin();
+ // If truncating more than the original extended value, abort.
+ if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
+ return false;
+ if (MMO.getSizeInBits() == SizeInBits)
+ return true;
+ }
+ return false;
}
-bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
+bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
- MachineIRBuilder MIB(MI);
- MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchSextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+ // Only supports scalars for now.
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
+ if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
+ return false;
+
+ // If the sign extend extends from a narrower width than the load's width,
+ // then we can narrow the load width when we combine to a G_SEXTLOAD.
+ auto &MMO = **LoadDef->memoperands_begin();
+ // Don't do this for non-simple loads.
+ if (MMO.isAtomic() || MMO.isVolatile())
+ return false;
+
+ // Avoid widening the load at all.
+ unsigned NewSizeBits =
+ std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
+
+ // Don't generate G_SEXTLOADs with a < 1 byte width.
+ if (NewSizeBits < 8)
+ return false;
+ // Don't bother creating a non-power-2 sextload, it will likely be broken up
+ // anyway for most targets.
+ if (!isPowerOf2_32(NewSizeBits))
+ return false;
+ MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
+ return true;
+}
+
+bool CombinerHelper::applySextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register LoadReg;
+ unsigned ScalarSizeBits;
+ std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
+ auto *LoadDef = MRI.getVRegDef(LoadReg);
+ assert(LoadDef && "Expected a load reg");
+
+ // If we have the following:
+ // %ld = G_LOAD %ptr, (load 2)
+ // %ext = G_SEXT_INREG %ld, 8
+ // ==>
+ // %ld = G_SEXTLOAD %ptr (load 1)
+
+ auto &MMO = **LoadDef->memoperands_begin();
+ Builder.setInstrAndDebugLoc(MI);
+ auto &MF = Builder.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
+ Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
+ LoadDef->getOperand(1).getReg(), *NewMMO);
MI.eraseFromParent();
return true;
}
@@ -611,7 +757,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
return false;
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
-
+ // FIXME: The following use traversal needs a bail out for patholigical cases.
for (auto &Use : MRI.use_nodbg_instructions(Base)) {
if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
continue;
@@ -738,6 +884,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS
Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
return false;
+ // For now, no targets actually support these opcodes so don't waste time
+ // running these unless we're forced to for testing.
+ if (!ForceLegalIndexing)
+ return false;
+
MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
@@ -790,14 +941,12 @@ void CombinerHelper::applyCombineIndexedLoadStore(
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
}
-bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
+bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_BR)
return false;
// Try to match the following:
// bb1:
- // %c(s32) = G_ICMP pred, %a, %b
- // %c1(s1) = G_TRUNC %c(s32)
// G_BRCOND %c1, %bb2
// G_BR %bb3
// bb2:
@@ -807,7 +956,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
// The above pattern does not have a fall through to the successor bb2, always
// resulting in a branch no matter which path is taken. Here we try to find
// and replace that pattern with conditional branch to bb3 and otherwise
- // fallthrough to bb2.
+ // fallthrough to bb2. This is generally better for branch predictors.
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator BrIt(MI);
@@ -822,40 +971,34 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
// Check that the next block is the conditional branch target.
if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
return false;
-
- MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
- if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
- !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
- return false;
return true;
}
-bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
- if (!matchElideBrByInvertingCond(MI))
- return false;
- applyElideBrByInvertingCond(MI);
- return true;
-}
-
-void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
+void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
MachineBasicBlock::iterator BrIt(MI);
MachineInstr *BrCond = &*std::prev(BrIt);
- MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
- CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
- (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
+ Builder.setInstrAndDebugLoc(*BrCond);
+ LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
+ // FIXME: Does int/fp matter for this? If so, we might need to restrict
+ // this to i1 only since we might not know for sure what kind of
+ // compare generated the condition value.
+ auto True = Builder.buildConstant(
+ Ty, getICmpTrueVal(getTargetLowering(), false, false));
+ auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
- // Invert the G_ICMP condition.
- Observer.changingInstr(*CmpMI);
- CmpMI->getOperand(1).setPredicate(InversePred);
- Observer.changedInstr(*CmpMI);
+ auto *FallthroughBB = BrCond->getOperand(1).getMBB();
+ Observer.changingInstr(MI);
+ MI.getOperand(0).setMBB(FallthroughBB);
+ Observer.changedInstr(MI);
- // Change the conditional branch target.
+ // Change the conditional branch to use the inverted condition and
+ // new target block.
Observer.changingInstr(*BrCond);
+ BrCond->getOperand(0).setReg(Xor.getReg(0));
BrCond->getOperand(1).setMBB(BrTarget);
Observer.changedInstr(*BrCond);
- MI.eraseFromParent();
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
@@ -946,8 +1089,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
unsigned NumBits = Ty.getScalarSizeInBits();
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
if (!Ty.isVector() && ValVRegAndVal) {
- unsigned KnownVal = ValVRegAndVal->Value;
- APInt Scalar = APInt(8, KnownVal);
+ APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
@@ -1299,13 +1441,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
}
bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ const unsigned Opc = MI.getOpcode();
// This combine is fairly complex so it's not written with a separate
// matcher function.
- assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
- Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
- assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
- ID == Intrinsic::memset) &&
- "Expected a memcpy like intrinsic");
+ assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+ Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
auto MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
@@ -1316,11 +1456,11 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
- Register Dst = MI.getOperand(1).getReg();
- Register Src = MI.getOperand(2).getReg();
- Register Len = MI.getOperand(3).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
- if (ID != Intrinsic::memset) {
+ if (Opc != TargetOpcode::G_MEMSET) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
MemOp = *(++MMOIt);
SrcAlign = MemOp->getBaseAlign();
@@ -1330,7 +1470,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
if (!LenVRegAndVal)
return false; // Leave it to the legalizer to lower it to a libcall.
- unsigned KnownLen = LenVRegAndVal->Value;
+ unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
MI.eraseFromParent();
@@ -1340,15 +1480,78 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
if (MaxLen && KnownLen > MaxLen)
return false;
- if (ID == Intrinsic::memcpy)
+ if (Opc == TargetOpcode::G_MEMCPY)
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (ID == Intrinsic::memmove)
+ if (Opc == TargetOpcode::G_MEMMOVE)
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (ID == Intrinsic::memset)
+ if (Opc == TargetOpcode::G_MEMSET)
return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
return false;
}
+static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
+ const Register Op,
+ const MachineRegisterInfo &MRI) {
+ const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
+ if (!MaybeCst)
+ return None;
+
+ APFloat V = MaybeCst->getValueAPF();
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_FNEG: {
+ V.changeSign();
+ return V;
+ }
+ case TargetOpcode::G_FABS: {
+ V.clearSign();
+ return V;
+ }
+ case TargetOpcode::G_FPTRUNC:
+ break;
+ case TargetOpcode::G_FSQRT: {
+ bool Unused;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+ V = APFloat(sqrt(V.convertToDouble()));
+ break;
+ }
+ case TargetOpcode::G_FLOG2: {
+ bool Unused;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+ V = APFloat(log2(V.convertToDouble()));
+ break;
+ }
+ }
+ // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
+ // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
+ // and `G_FLOG2` reach here.
+ bool Unused;
+ V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
+ return V;
+}
+
+bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
+ return Cst.hasValue();
+}
+
+bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst) {
+ assert(Cst.hasValue() && "Optional is unexpectedly empty!");
+ Builder.setInstrAndDebugLoc(MI);
+ MachineFunction &MF = Builder.getMF();
+ auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.buildFConstant(DstReg, *FPVal);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
PtrAddChain &MatchInfo) {
// We're trying to match the following pattern:
@@ -1377,7 +1580,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
return false;
// Pass the combined immediate to the apply function.
- MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
MatchInfo.Base = Base;
return true;
}
@@ -1395,15 +1598,211 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
+ // %t1 = SHIFT %base, G_CONSTANT imm1
+ // %root = SHIFT %t1, G_CONSTANT imm2
+ // -->
+ // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
+
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Register Shl2 = MI.getOperand(1).getReg();
+ Register Imm1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
+ if (Shl2Def->getOpcode() != Opcode)
+ return false;
+
+ Register Base = Shl2Def->getOperand(1).getReg();
+ Register Imm2 = Shl2Def->getOperand(2).getReg();
+ auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ if (!MaybeImm2Val)
+ return false;
+
+ // Pass the combined immediate to the apply function.
+ MatchInfo.Imm =
+ (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Reg = Base;
+
+ // There is no simple replacement for a saturating unsigned left shift that
+ // exceeds the scalar size.
+ if (Opcode == TargetOpcode::G_USHLSAT &&
+ MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
+ return false;
+
+ return true;
+}
+
+bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Builder.setInstrAndDebugLoc(MI);
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
+ auto Imm = MatchInfo.Imm;
+
+ if (Imm >= ScalarSizeInBits) {
+ // Any logical shift that exceeds scalar size will produce zero.
+ if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
+ Builder.buildConstant(MI.getOperand(0), 0);
+ MI.eraseFromParent();
+ return true;
+ }
+ // Arithmetic shift and saturating signed left shift have no effect beyond
+ // scalar size.
+ Imm = ScalarSizeInBits - 1;
+ }
+
+ LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
+ Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(MatchInfo.Reg);
+ MI.getOperand(2).setReg(NewImm);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
+ // with any of G_AND/G_OR/G_XOR logic instructions.
+ // %t1 = SHIFT %X, G_CONSTANT C0
+ // %t2 = LOGIC %t1, %Y
+ // %root = SHIFT %t2, G_CONSTANT C1
+ // -->
+ // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
+ // %t4 = SHIFT %Y, G_CONSTANT C1
+ // %root = LOGIC %t3, %t4
+ unsigned ShiftOpcode = MI.getOpcode();
+ assert((ShiftOpcode == TargetOpcode::G_SHL ||
+ ShiftOpcode == TargetOpcode::G_ASHR ||
+ ShiftOpcode == TargetOpcode::G_LSHR ||
+ ShiftOpcode == TargetOpcode::G_USHLSAT ||
+ ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ // Match a one-use bitwise logic op.
+ Register LogicDest = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(LogicDest))
+ return false;
+
+ MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
+ unsigned LogicOpcode = LogicMI->getOpcode();
+ if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
+ LogicOpcode != TargetOpcode::G_XOR)
+ return false;
+
+ // Find a matching one-use shift by constant.
+ const Register C1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
+
+ auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
+ // Shift should match previous one and should be a one-use.
+ if (MI->getOpcode() != ShiftOpcode ||
+ !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ return false;
+
+ // Must be a constant.
+ auto MaybeImmVal =
+ getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
+ MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
+ Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
+ MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
+ uint64_t C0Val;
+
+ if (matchFirstShift(LogicMIOp1, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg2;
+ MatchInfo.Shift2 = LogicMIOp1;
+ } else if (matchFirstShift(LogicMIOp2, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg1;
+ MatchInfo.Shift2 = LogicMIOp2;
+ } else
+ return false;
+
+ MatchInfo.ValSum = C0Val + C1Val;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
+ return false;
+
+ MatchInfo.Logic = LogicMI;
+ return true;
+}
+
+bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
+ Opcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
+ LLT DestType = MRI.getType(MI.getOperand(0).getReg());
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
+
+ Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
+ Register Shift1 =
+ Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
+
+ Register Shift2Const = MI.getOperand(2).getReg();
+ Register Shift2 = Builder
+ .buildInstr(Opcode, {DestType},
+ {MatchInfo.LogicNonShiftReg, Shift2Const})
+ .getReg(0);
+
+ Register Dest = MI.getOperand(0).getReg();
+ Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
+
+ // These were one use so it's safe to remove them.
+ MatchInfo.Shift2->eraseFromParent();
+ MatchInfo.Logic->eraseFromParent();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
- if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
+ if (!MaybeImmVal)
return false;
- ShiftVal = Log2_64(MaybeImmVal->Value);
- return true;
+
+ ShiftVal = MaybeImmVal->Value.exactLogBase2();
+ return (static_cast<int32_t>(ShiftVal) != -1);
}
bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
@@ -1419,6 +1818,254 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
return true;
}
+// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
+bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
+ RegisterImmPair &MatchData) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+
+ Register LHS = MI.getOperand(1).getReg();
+
+ Register ExtSrc;
+ if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
+ return false;
+
+ // TODO: Should handle vector splat.
+ Register RHS = MI.getOperand(2).getReg();
+ auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!MaybeShiftAmtVal)
+ return false;
+
+ if (LI) {
+ LLT SrcTy = MRI.getType(ExtSrc);
+
+ // We only really care about the legality with the shifted value. We can
+ // pick any type the constant shift amount, so ask the target what to
+ // use. Otherwise we would have to guess and hope it is reported as legal.
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
+ return false;
+ }
+
+ int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ MatchData.Reg = ExtSrc;
+ MatchData.Imm = ShiftAmt;
+
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
+ return MinLeadingZeros >= ShiftAmt;
+}
+
+bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
+ const RegisterImmPair &MatchData) {
+ Register ExtSrcReg = MatchData.Reg;
+ int64_t ShiftAmtVal = MatchData.Imm;
+
+ LLT ExtSrcTy = MRI.getType(ExtSrcReg);
+ Builder.setInstrAndDebugLoc(MI);
+ auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
+ auto NarrowShift =
+ Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
+ Builder.buildZExt(MI.getOperand(0), NarrowShift);
+ MI.eraseFromParent();
+ return true;
+}
+
+static Register peekThroughBitcast(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
+ ;
+
+ return Reg;
+}
+
+bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register SrcReg =
+ peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
+ SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
+ SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+ return false;
+
+ // Check the source type of the merge.
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
+ if (SrcMergeTy != Dst0Ty && !SameSize)
+ return false;
+ // They are the same now (modulo a bitcast).
+ // We can collect all the src registers.
+ for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
+ ++Idx)
+ Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Operands.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+
+ LLT SrcTy = MRI.getType(Operands[0]);
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ bool CanReuseInputDirectly = DstTy == SrcTy;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Register SrcReg = Operands[Idx];
+ if (CanReuseInputDirectly)
+ replaceRegWith(MRI, DstReg, SrcReg);
+ else
+ Builder.buildCast(DstReg, SrcReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ unsigned SrcIdx = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
+ SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
+ return false;
+ // Break down the big constant in smaller ones.
+ const MachineOperand &CstVal = SrcInstr->getOperand(1);
+ APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
+ ? CstVal.getCImm()->getValue()
+ : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned ShiftAmt = Dst0Ty.getSizeInBits();
+ // Unmerge a constant.
+ for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
+ Csts.emplace_back(Val.trunc(ShiftAmt));
+ Val = Val.lshr(ShiftAmt);
+ }
+
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Csts.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Builder.buildConstant(DstReg, Csts[Idx]);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ // Check that all the lanes are dead except the first one.
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
+ return false;
+ }
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ Builder.setInstrAndDebugLoc(MI);
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ // Truncating a vector is going to truncate every single lane,
+ // whereas we want the full lowbits.
+ // Do the operation on a scalar instead.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ SrcReg =
+ Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ if (Dst0Ty.isVector()) {
+ auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
+ Builder.buildCast(Dst0Reg, MIB);
+ } else
+ Builder.buildTrunc(Dst0Reg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ // G_ZEXT on vector applies to each lane, so it will
+ // affect all destinations. Therefore we won't be able
+ // to simplify the unmerge to just the first definition.
+ if (Dst0Ty.isVector())
+ return false;
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ return false;
+
+ Register ZExtSrcReg;
+ if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
+ return false;
+
+ // Finally we can replace the first definition with
+ // a zext of the source if the definition is big enough to hold
+ // all of ZExtSrc bits.
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+ return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
+}
+
+bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+
+ MachineInstr *ZExtInstr =
+ MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
+ assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
+ "Expecting a G_ZEXT");
+
+ Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+
+ if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
+ Builder.buildZExt(Dst0Reg, ZExtSrcReg);
+ } else {
+ assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
+ "ZExt src doesn't fit in destination");
+ replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
+ }
+
+ Register ZeroReg;
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!ZeroReg)
+ ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
+ replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
unsigned TargetShiftSize,
unsigned &ShiftVal) {
@@ -1440,7 +2087,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
if (!MaybeImmVal)
return false;
- ShiftVal = MaybeImmVal->Value;
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
return ShiftVal >= Size / 2 && ShiftVal < Size;
}
@@ -1529,6 +2176,296 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
return false;
}
+bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI,
+ m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
+}
+
+bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildCopy(DstReg, Reg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
+}
+
+bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildZExtOrTrunc(DstReg, Reg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD);
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT IntTy = MRI.getType(LHS);
+
+ // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
+ // instruction.
+ PtrReg.second = false;
+ for (Register SrcReg : {LHS, RHS}) {
+ if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
+ // Don't handle cases where the integer is implicitly converted to the
+ // pointer width.
+ LLT PtrTy = MRI.getType(PtrReg.first);
+ if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
+ return true;
+ }
+
+ PtrReg.second = true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ const bool DoCommute = PtrReg.second;
+ if (DoCommute)
+ std::swap(LHS, RHS);
+ LHS = PtrReg.first;
+
+ LLT PtrTy = MRI.getType(LHS);
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
+ Builder.buildPtrToInt(Dst, PtrAdd);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
+ int64_t &NewCst) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
+
+ if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+ int64_t Cst;
+ if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
+ NewCst = Cst + *RHSCst;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
+ int64_t &NewCst) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+ Register Dst = MI.getOperand(0).getReg();
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildConstant(Dst, NewCst);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ return mi_match(SrcReg, MRI,
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+}
+
+bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, Reg);
+ return true;
+}
+
+bool CombinerHelper::matchCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
+ unsigned Opc = MI.getOpcode();
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (Opc == SrcOpc ||
+ (Opc == TargetOpcode::G_ANYEXT &&
+ (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
+ (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
+ MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+
+ Register Reg = std::get<0>(MatchInfo);
+ unsigned SrcExtOp = std::get<1>(MatchInfo);
+
+ // Combine exts with the same opcode.
+ if (MI.getOpcode() == SrcExtOp) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Reg);
+ Observer.changedInstr(MI);
+ return true;
+ }
+
+ // Combine:
+ // - anyext([sz]ext x) to [sz]ext x
+ // - sext(zext x) to zext x
+ if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ (MI.getOpcode() == TargetOpcode::G_SEXT &&
+ SrcExtOp == TargetOpcode::G_ZEXT)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
+ MI.getFlags());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
+}
+
+bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Src = MI.getOperand(1).getReg();
+ Register AbsSrc;
+ return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
+}
+
+bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Dst = MI.getOperand(0).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, Dst, Src);
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
+ SrcOpc == TargetOpcode::G_ZEXT) {
+ MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MatchInfo.first;
+ unsigned SrcExtOp = MatchInfo.second;
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(DstReg);
+ if (SrcTy == DstTy) {
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, SrcReg);
+ return true;
+ }
+ Builder.setInstrAndDebugLoc(MI);
+ if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
+ Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
+ else
+ Builder.buildTrunc(DstReg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfShl(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register ShiftSrc;
+ Register ShiftAmt;
+
+ if (MRI.hasOneNonDBGUse(SrcReg) &&
+ mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SHL,
+ {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
+ KnownBits Known = KB->getKnownBits(ShiftAmt);
+ unsigned Size = DstTy.getSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfShl(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+
+ Register ShiftSrc = MatchInfo.first;
+ Register ShiftAmt = MatchInfo.second;
+ Builder.setInstrAndDebugLoc(MI);
+ auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
+ Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
return MO.isReg() &&
@@ -1555,6 +2492,22 @@ bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
MRI);
}
+bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ if (auto MaybeCstCmp =
+ getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
+ OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
+ return true;
+ }
+ return false;
+}
+
bool CombinerHelper::eraseInst(MachineInstr &MI) {
MI.eraseFromParent();
return true;
@@ -1651,6 +2604,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
return true;
}
+bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+ Register Replacement) {
+ assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+ Register OldReg = MI.getOperand(0).getReg();
+ assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+ MI.eraseFromParent();
+ replaceRegWith(MRI, OldReg, Replacement);
+ return true;
+}
+
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
// Match (cond ? x : x)
@@ -1671,6 +2634,18 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
MRI);
}
+bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return MO.isReg() &&
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+}
+
+bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
+}
+
bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
@@ -1706,9 +2681,7 @@ bool CombinerHelper::matchSimplifyAddToSub(
// ((0-A) + B) -> B - A
// (A + (0-B)) -> A - B
auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
- int64_t Cst;
- if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
- Cst != 0)
+ if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
return false;
NewLHS = MaybeNewLHS;
return true;
@@ -1717,6 +2690,67 @@ bool CombinerHelper::matchSimplifyAddToSub(
return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
}
+bool CombinerHelper::matchCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
+ "Invalid opcode");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
+ unsigned NumElts = DstTy.getNumElements();
+ // If this MI is part of a sequence of insert_vec_elts, then
+ // don't do the combine in the middle of the sequence.
+ if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
+ TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ MachineInstr *CurrInst = &MI;
+ MachineInstr *TmpInst;
+ int64_t IntImm;
+ Register TmpReg;
+ MatchInfo.resize(NumElts);
+ while (mi_match(
+ CurrInst->getOperand(0).getReg(), MRI,
+ m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
+ if (IntImm >= NumElts)
+ return false;
+ if (!MatchInfo[IntImm])
+ MatchInfo[IntImm] = TmpReg;
+ CurrInst = TmpInst;
+ }
+ // Variable index.
+ if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
+ if (!MatchInfo[I - 1].isValid())
+ MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
+ }
+ return true;
+ }
+ // If we didn't end in a G_IMPLICIT_DEF, bail out.
+ return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+}
+
+bool CombinerHelper::applyCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register UndefReg;
+ auto GetUndef = [&]() {
+ if (UndefReg)
+ return UndefReg;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
+ return UndefReg;
+ };
+ for (unsigned I = 0; I < MatchInfo.size(); ++I) {
+ if (!MatchInfo[I])
+ MatchInfo[I] = GetUndef();
+ }
+ Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::applySimplifyAddToSub(
MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
Builder.setInstr(MI);
@@ -1727,6 +2761,812 @@ bool CombinerHelper::applySimplifyAddToSub(
return true;
}
+bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
+ //
+ // Creates the new hand + logic instruction (but does not insert them.)
+ //
+ // On success, MatchInfo is populated with the new instructions. These are
+ // inserted in applyHoistLogicOpWithSameOpcodeHands.
+ unsigned LogicOpcode = MI.getOpcode();
+ assert(LogicOpcode == TargetOpcode::G_AND ||
+ LogicOpcode == TargetOpcode::G_OR ||
+ LogicOpcode == TargetOpcode::G_XOR);
+ MachineIRBuilder MIB(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ // Don't recompute anything.
+ if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
+ return false;
+
+ // Make sure we have (hand x, ...), (hand y, ...)
+ MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
+ MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
+ if (!LeftHandInst || !RightHandInst)
+ return false;
+ unsigned HandOpcode = LeftHandInst->getOpcode();
+ if (HandOpcode != RightHandInst->getOpcode())
+ return false;
+ if (!LeftHandInst->getOperand(1).isReg() ||
+ !RightHandInst->getOperand(1).isReg())
+ return false;
+
+ // Make sure the types match up, and if we're doing this post-legalization,
+ // we end up with legal types.
+ Register X = LeftHandInst->getOperand(1).getReg();
+ Register Y = RightHandInst->getOperand(1).getReg();
+ LLT XTy = MRI.getType(X);
+ LLT YTy = MRI.getType(Y);
+ if (XTy != YTy)
+ return false;
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
+ // Optional extra source register.
+ Register ExtraHandOpSrcReg;
+ switch (HandOpcode) {
+ default:
+ return false;
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT: {
+ // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
+ break;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_SHL: {
+ // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
+ MachineOperand &ZOp = LeftHandInst->getOperand(2);
+ if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
+ return false;
+ ExtraHandOpSrcReg = ZOp.getReg();
+ break;
+ }
+ }
+
+ // Record the steps to build the new instructions.
+ //
+ // Steps to build (logic x, y)
+ auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
+ OperandBuildSteps LogicBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
+ InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
+
+ // Steps to build hand (logic x, y), ...z
+ OperandBuildSteps HandBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
+ if (ExtraHandOpSrcReg.isValid())
+ HandBuildSteps.push_back(
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
+ InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
+
+ MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
+ return true;
+}
+
+bool CombinerHelper::applyBuildInstructionSteps(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ assert(MatchInfo.InstrsToBuild.size() &&
+ "Expected at least one instr to build?");
+ Builder.setInstr(MI);
+ for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
+ assert(InstrToBuild.Opcode && "Expected a valid opcode?");
+ assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
+ MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
+ for (auto &OperandFn : InstrToBuild.OperandFns)
+ OperandFn(Instr);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchAshrShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ int64_t ShlCst, AshrCst;
+ Register Src;
+ // FIXME: detect splat constant vectors.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
+ return false;
+ if (ShlCst != AshrCst)
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
+ return false;
+ MatchInfo = std::make_tuple(Src, ShlCst);
+ return true;
+}
+bool CombinerHelper::applyAshShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ Register Src;
+ int64_t ShiftAmt;
+ std::tie(Src, ShiftAmt) = MatchInfo;
+ unsigned Size = MRI.getType(Src).getScalarSizeInBits();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
+ Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_AND %x, %y
+ //
+ // Eliminate the G_AND when it is known that x & y == x or x & y == y.
+ //
+ // Patterns like this can appear as a result of legalization. E.g.
+ //
+ // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
+ // %one:_(s32) = G_CONSTANT i32 1
+ // %and:_(s32) = G_AND %cmp, %one
+ //
+ // In this case, G_ICMP only produces a single bit, so x & 1 == x.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ if (!KB)
+ return false;
+
+ Register AndDst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(AndDst);
+
+ // FIXME: This should be removed once GISelKnownBits supports vectors.
+ if (DstTy.isVector())
+ return false;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x & Mask == x.
+ // x & 1 == x, always
+ // x & 0 == x, only if x is also 0
+ // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
+ //
+ // Check if we can replace AndDst with the LHS of the G_AND
+ if (canReplaceReg(AndDst, LHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace AndDst with the RHS of the G_AND
+ if (canReplaceReg(AndDst, RHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_OR %x, %y
+ //
+ // Eliminate the G_OR when it is known that x | y == x or x | y == y.
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ if (!KB)
+ return false;
+
+ Register OrDst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(OrDst);
+
+ // FIXME: This should be removed once GISelKnownBits supports vectors.
+ if (DstTy.isVector())
+ return false;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x | Mask == x.
+ // x | 0 == x, always
+ // x | 1 == x, only if x is also 1
+ // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
+ //
+ // Check if we can replace OrDst with the LHS of the G_OR
+ if (canReplaceReg(OrDst, LHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace OrDst with the RHS of the G_OR
+ if (canReplaceReg(OrDst, RHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
+ // If the input is already sign extended, just drop the extension.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned ExtBits = MI.getOperand(2).getImm();
+ unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
+ return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
+}
+
+static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
+ int64_t Cst, bool IsVector, bool IsFP) {
+ // For i1, Cst will always be -1 regardless of boolean contents.
+ return (ScalarSizeBits == 1 && Cst == -1) ||
+ isConstTrueVal(TLI, Cst, IsVector, IsFP);
+}
+
+bool CombinerHelper::matchNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
+ Register XorSrc;
+ Register CstReg;
+ // We match xor(src, true) here.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
+ return false;
+
+ if (!MRI.hasOneNonDBGUse(XorSrc))
+ return false;
+
+ // Check that XorSrc is the root of a tree of comparisons combined with ANDs
+ // and ORs. The suffix of RegsToNegate starting from index I is used a work
+ // list of tree nodes to visit.
+ RegsToNegate.push_back(XorSrc);
+ // Remember whether the comparisons are all integer or all floating point.
+ bool IsInt = false;
+ bool IsFP = false;
+ for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
+ Register Reg = RegsToNegate[I];
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return false;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ switch (Def->getOpcode()) {
+ default:
+ // Don't match if the tree contains anything other than ANDs, ORs and
+ // comparisons.
+ return false;
+ case TargetOpcode::G_ICMP:
+ if (IsFP)
+ return false;
+ IsInt = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_FCMP:
+ if (IsInt)
+ return false;
+ IsFP = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ // Implement De Morgan's laws:
+ // ~(x & y) -> ~x | ~y
+ // ~(x | y) -> ~x & ~y
+ // When we apply the combine we will change the opcode and recursively
+ // negate the operands.
+ RegsToNegate.push_back(Def->getOperand(1).getReg());
+ RegsToNegate.push_back(Def->getOperand(2).getReg());
+ break;
+ }
+ }
+
+ // Now we know whether the comparisons are integer or floating point, check
+ // the constant in the xor.
+ int64_t Cst;
+ if (Ty.isVector()) {
+ MachineInstr *CstDef = MRI.getVRegDef(CstReg);
+ auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
+ if (!MaybeCst)
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
+ return false;
+ } else {
+ if (!mi_match(CstReg, MRI, m_ICst(Cst)))
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
+ return false;
+ }
+
+ return true;
+}
+
+bool CombinerHelper::applyNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ for (Register Reg : RegsToNegate) {
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ Observer.changingInstr(*Def);
+ // For each comparison, invert the opcode. For each AND and OR, change the
+ // opcode.
+ switch (Def->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ MachineOperand &PredOp = Def->getOperand(1);
+ CmpInst::Predicate NewP = CmpInst::getInversePredicate(
+ (CmpInst::Predicate)PredOp.getPredicate());
+ PredOp.setPredicate(NewP);
+ break;
+ }
+ case TargetOpcode::G_AND:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
+ break;
+ case TargetOpcode::G_OR:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ break;
+ }
+ Observer.changedInstr(*Def);
+ }
+
+ replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Match (xor (and x, y), y) (or any of its commuted cases)
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ Register &X = MatchInfo.first;
+ Register &Y = MatchInfo.second;
+ Register AndReg = MI.getOperand(1).getReg();
+ Register SharedReg = MI.getOperand(2).getReg();
+
+ // Find a G_AND on either side of the G_XOR.
+ // Look for one of
+ //
+ // (xor (and x, y), SharedReg)
+ // (xor SharedReg, (and x, y))
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
+ std::swap(AndReg, SharedReg);
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
+ return false;
+ }
+
+ // Only do this if we'll eliminate the G_AND.
+ if (!MRI.hasOneNonDBGUse(AndReg))
+ return false;
+
+ // We can combine if SharedReg is the same as either the LHS or RHS of the
+ // G_AND.
+ if (Y != SharedReg)
+ std::swap(X, Y);
+ return Y == SharedReg;
+}
+
+bool CombinerHelper::applyXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Fold (xor (and x, y), y) -> (and (not x), y)
+ Builder.setInstrAndDebugLoc(MI);
+ Register X, Y;
+ std::tie(X, Y) = MatchInfo;
+ auto Not = Builder.buildNot(MRI.getType(X), X);
+ Observer.changingInstr(MI);
+ MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ MI.getOperand(1).setReg(Not->getOperand(0).getReg());
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ const DataLayout &DL = Builder.getMF().getDataLayout();
+
+ if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
+ return false;
+
+ if (Ty.isPointer()) {
+ auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+ return ConstVal && *ConstVal == 0;
+ }
+
+ assert(Ty.isVector() && "Expecting a vector type");
+ const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ return isBuildVectorAllZeros(*VecMI, MRI);
+}
+
+bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
+ MI.eraseFromParent();
+ return true;
+}
+
+/// The second source operand is known to be a power of 2.
+bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Pow2Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ Builder.setInstrAndDebugLoc(MI);
+
+ // Fold (urem x, pow2) -> (and x, pow2-1)
+ auto NegOne = Builder.buildConstant(Ty, -1);
+ auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
+ Builder.buildAnd(DstReg, Src0, Add);
+ MI.eraseFromParent();
+ return true;
+}
+
+Optional<SmallVector<Register, 8>>
+CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
+ assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
+ // We want to detect if Root is part of a tree which represents a bunch
+ // of loads being merged into a larger load. We'll try to recognize patterns
+ // like, for example:
+ //
+ // Reg Reg
+ // \ /
+ // OR_1 Reg
+ // \ /
+ // OR_2
+ // \ Reg
+ // .. /
+ // Root
+ //
+ // Reg Reg Reg Reg
+ // \ / \ /
+ // OR_1 OR_2
+ // \ /
+ // \ /
+ // ...
+ // Root
+ //
+ // Each "Reg" may have been produced by a load + some arithmetic. This
+ // function will save each of them.
+ SmallVector<Register, 8> RegsToVisit;
+ SmallVector<const MachineInstr *, 7> Ors = {Root};
+
+ // In the "worst" case, we're dealing with a load for each byte. So, there
+ // are at most #bytes - 1 ORs.
+ const unsigned MaxIter =
+ MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
+ for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
+ if (Ors.empty())
+ break;
+ const MachineInstr *Curr = Ors.pop_back_val();
+ Register OrLHS = Curr->getOperand(1).getReg();
+ Register OrRHS = Curr->getOperand(2).getReg();
+
+ // In the combine, we want to elimate the entire tree.
+ if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
+ return None;
+
+ // If it's a G_OR, save it and continue to walk. If it's not, then it's
+ // something that may be a load + arithmetic.
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrLHS);
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrRHS);
+ }
+
+ // We're going to try and merge each register into a wider power-of-2 type,
+ // so we ought to have an even number of registers.
+ if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
+ return None;
+ return RegsToVisit;
+}
+
+/// Helper function for findLoadOffsetsForLoadOrCombine.
+///
+/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
+/// and then moving that value into a specific byte offset.
+///
+/// e.g. x[i] << 24
+///
+/// \returns The load instruction and the byte offset it is moved into.
+static Optional<std::pair<MachineInstr *, int64_t>>
+matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
+ const MachineRegisterInfo &MRI) {
+ assert(MRI.hasOneNonDBGUse(Reg) &&
+ "Expected Reg to only have one non-debug use?");
+ Register MaybeLoad;
+ int64_t Shift;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
+ Shift = 0;
+ MaybeLoad = Reg;
+ }
+
+ if (Shift % MemSizeInBits != 0)
+ return None;
+
+ // TODO: Handle other types of loads.
+ auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
+ if (!Load)
+ return None;
+
+ const auto &MMO = **Load->memoperands_begin();
+ if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
+ return None;
+
+ return std::make_pair(Load, Shift / MemSizeInBits);
+}
+
+Optional<std::pair<MachineInstr *, int64_t>>
+CombinerHelper::findLoadOffsetsForLoadOrCombine(
+ SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
+
+ // Each load found for the pattern. There should be one for each RegsToVisit.
+ SmallSetVector<const MachineInstr *, 8> Loads;
+
+ // The lowest index used in any load. (The lowest "i" for each x[i].)
+ int64_t LowestIdx = INT64_MAX;
+
+ // The load which uses the lowest index.
+ MachineInstr *LowestIdxLoad = nullptr;
+
+ // Keeps track of the load indices we see. We shouldn't see any indices twice.
+ SmallSet<int64_t, 8> SeenIdx;
+
+ // Ensure each load is in the same MBB.
+ // TODO: Support multiple MachineBasicBlocks.
+ MachineBasicBlock *MBB = nullptr;
+ const MachineMemOperand *MMO = nullptr;
+
+ // Earliest instruction-order load in the pattern.
+ MachineInstr *EarliestLoad = nullptr;
+
+ // Latest instruction-order load in the pattern.
+ MachineInstr *LatestLoad = nullptr;
+
+ // Base pointer which every load should share.
+ Register BasePtr;
+
+ // We want to find a load for each register. Each load should have some
+ // appropriate bit twiddling arithmetic. During this loop, we will also keep
+ // track of the load which uses the lowest index. Later, we will check if we
+ // can use its pointer in the final, combined load.
+ for (auto Reg : RegsToVisit) {
+ // Find the load, and find the position that it will end up in (e.g. a
+ // shifted) value.
+ auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
+ if (!LoadAndPos)
+ return None;
+ MachineInstr *Load;
+ int64_t DstPos;
+ std::tie(Load, DstPos) = *LoadAndPos;
+
+ // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
+ // it is difficult to check for stores/calls/etc between loads.
+ MachineBasicBlock *LoadMBB = Load->getParent();
+ if (!MBB)
+ MBB = LoadMBB;
+ if (LoadMBB != MBB)
+ return None;
+
+ // Make sure that the MachineMemOperands of every seen load are compatible.
+ const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
+ if (!MMO)
+ MMO = LoadMMO;
+ if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
+ return None;
+
+ // Find out what the base pointer and index for the load is.
+ Register LoadPtr;
+ int64_t Idx;
+ if (!mi_match(Load->getOperand(1).getReg(), MRI,
+ m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
+ LoadPtr = Load->getOperand(1).getReg();
+ Idx = 0;
+ }
+
+ // Don't combine things like a[i], a[i] -> a bigger load.
+ if (!SeenIdx.insert(Idx).second)
+ return None;
+
+ // Every load must share the same base pointer; don't combine things like:
+ //
+ // a[i], b[i + 1] -> a bigger load.
+ if (!BasePtr.isValid())
+ BasePtr = LoadPtr;
+ if (BasePtr != LoadPtr)
+ return None;
+
+ if (Idx < LowestIdx) {
+ LowestIdx = Idx;
+ LowestIdxLoad = Load;
+ }
+
+ // Keep track of the byte offset that this load ends up at. If we have seen
+ // the byte offset, then stop here. We do not want to combine:
+ //
+ // a[i] << 16, a[i + k] << 16 -> a bigger load.
+ if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
+ return None;
+ Loads.insert(Load);
+
+ // Keep track of the position of the earliest/latest loads in the pattern.
+ // We will check that there are no load fold barriers between them later
+ // on.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ if (!EarliestLoad || dominates(*Load, *EarliestLoad))
+ EarliestLoad = Load;
+ if (!LatestLoad || dominates(*LatestLoad, *Load))
+ LatestLoad = Load;
+ }
+
+ // We found a load for each register. Let's check if each load satisfies the
+ // pattern.
+ assert(Loads.size() == RegsToVisit.size() &&
+ "Expected to find a load for each register?");
+ assert(EarliestLoad != LatestLoad && EarliestLoad &&
+ LatestLoad && "Expected at least two loads?");
+
+ // Check if there are any stores, calls, etc. between any of the loads. If
+ // there are, then we can't safely perform the combine.
+ //
+ // MaxIter is chosen based off the (worst case) number of iterations it
+ // typically takes to succeed in the LLVM test suite plus some padding.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ const unsigned MaxIter = 20;
+ unsigned Iter = 0;
+ for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
+ LatestLoad->getIterator())) {
+ if (Loads.count(&MI))
+ continue;
+ if (MI.isLoadFoldBarrier())
+ return None;
+ if (Iter++ == MaxIter)
+ return None;
+ }
+
+ return std::make_pair(LowestIdxLoad, LowestIdx);
+}
+
+bool CombinerHelper::matchLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ MachineFunction &MF = *MI.getMF();
+ // Assuming a little-endian target, transform:
+ // s8 *a = ...
+ // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+ // =>
+ // s32 val = *((i32)a)
+ //
+ // s8 *a = ...
+ // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+ // =>
+ // s32 val = BSWAP(*((s32)a))
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (Ty.isVector())
+ return false;
+
+ // We need to combine at least two loads into this type. Since the smallest
+ // possible load is into a byte, we need at least a 16-bit wide type.
+ const unsigned WideMemSizeInBits = Ty.getSizeInBits();
+ if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
+ return false;
+
+ // Match a collection of non-OR instructions in the pattern.
+ auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
+ if (!RegsToVisit)
+ return false;
+
+ // We have a collection of non-OR instructions. Figure out how wide each of
+ // the small loads should be based off of the number of potential loads we
+ // found.
+ const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
+ if (NarrowMemSizeInBits % 8 != 0)
+ return false;
+
+ // Check if each register feeding into each OR is a load from the same
+ // base pointer + some arithmetic.
+ //
+ // e.g. a[0], a[1] << 8, a[2] << 16, etc.
+ //
+ // Also verify that each of these ends up putting a[i] into the same memory
+ // offset as a load into a wide type would.
+ SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
+ MachineInstr *LowestIdxLoad;
+ int64_t LowestIdx;
+ auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
+ MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
+ if (!MaybeLoadInfo)
+ return false;
+ std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+
+ // We have a bunch of loads being OR'd together. Using the addresses + offsets
+ // we found before, check if this corresponds to a big or little endian byte
+ // pattern. If it does, then we can represent it using a load + possibly a
+ // BSWAP.
+ bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
+ Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
+ if (!IsBigEndian.hasValue())
+ return false;
+ bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
+ if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
+ return false;
+
+ // Make sure that the load from the lowest index produces offset 0 in the
+ // final value.
+ //
+ // This ensures that we won't combine something like this:
+ //
+ // load x[i] -> byte 2
+ // load x[i+1] -> byte 0 ---> wide_load x[i]
+ // load x[i+2] -> byte 1
+ const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
+ const unsigned ZeroByteOffset =
+ *IsBigEndian
+ ? bigEndianByteAt(NumLoadsInTy, 0)
+ : littleEndianByteAt(NumLoadsInTy, 0);
+ auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
+ if (ZeroOffsetIdx == MemOffset2Idx.end() ||
+ ZeroOffsetIdx->second != LowestIdx)
+ return false;
+
+ // We wil reuse the pointer from the load which ends up at byte offset 0. It
+ // may not use index 0.
+ Register Ptr = LowestIdxLoad->getOperand(1).getReg();
+ const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
+ LegalityQuery::MemDesc MMDesc;
+ MMDesc.SizeInBits = WideMemSizeInBits;
+ MMDesc.AlignInBits = MMO.getAlign().value() * 8;
+ MMDesc.Ordering = MMO.getOrdering();
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
+ return false;
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
+
+ // Load must be allowed and fast on the target.
+ LLVMContext &C = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ bool Fast = false;
+ if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
+ !Fast)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
+ MIB.buildLoad(LoadDst, Ptr, *NewMMO);
+ if (NeedsBSwap)
+ MIB.buildBSwap(Dst, LoadDst);
+ };
+ return true;
+}
+
+bool CombinerHelper::applyLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;