summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1790
1 files changed, 1143 insertions, 647 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 06d827de2e96..3a52959d54bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -12,9 +12,11 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,8 +28,10 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetMachine.h"
#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@@ -46,8 +50,9 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
MachineDominatorTree *MDT,
const LegalizerInfo *LI)
- : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
- KB(KB), MDT(MDT), LI(LI) {
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
+ MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
+ TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
(void)this->KB;
}
@@ -64,6 +69,16 @@ static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
return I;
}
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(V);
+ auto Ctlz = MIB.buildCTLZ(Ty, V);
+ auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
+ return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
+}
+
/// \returns The big endian in-memory byte position of byte \p I in a
/// \p ByteWidth bytes wide type.
///
@@ -143,6 +158,24 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
Observer.changedInstr(*FromRegOp.getParent());
}
+void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
+ unsigned ToOpcode) const {
+ Observer.changingInstr(FromMI);
+
+ FromMI.setDesc(Builder.getTII().get(ToOpcode));
+
+ Observer.changedInstr(FromMI);
+}
+
+const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
+ return RBI->getRegBank(Reg, MRI, *TRI);
+}
+
+void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) {
+ if (RegBank)
+ MRI.setRegBank(Reg, *RegBank);
+}
+
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
if (matchCombineCopy(MI)) {
applyCombineCopy(MI);
@@ -486,10 +519,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
continue;
// Check for legality.
if (LI) {
- LegalityQuery::MemDesc MMDesc;
- MMDesc.MemoryTy = MMO.getMemoryType();
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getSuccessOrdering();
+ LegalityQuery::MemDesc MMDesc(MMO);
LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
@@ -623,13 +653,83 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI);
}
+bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // If we have the following code:
+ // %mask = G_CONSTANT 255
+ // %ld = G_LOAD %ptr, (load s16)
+ // %and = G_AND %ld, %mask
+ //
+ // Try to fold it into
+ // %ld = G_ZEXTLOAD %ptr, (load s8)
+
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI.getType(Dst).isVector())
+ return false;
+
+ auto MaybeMask =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeMask)
+ return false;
+
+ APInt MaskVal = MaybeMask->Value;
+
+ if (!MaskVal.isMask())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
+ if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
+ !LoadMI->isSimple())
+ return false;
+
+ Register LoadReg = LoadMI->getDstReg();
+ LLT LoadTy = MRI.getType(LoadReg);
+ Register PtrReg = LoadMI->getPointerReg();
+ uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
+ unsigned MaskSizeBits = MaskVal.countTrailingOnes();
+
+ // The mask may not be larger than the in-memory type, as it might cover sign
+ // extended bits
+ if (MaskSizeBits > LoadSizeBits)
+ return false;
+
+ // If the mask covers the whole destination register, there's nothing to
+ // extend
+ if (MaskSizeBits >= LoadTy.getSizeInBits())
+ return false;
+
+ // Most targets cannot deal with loads of size < 8 and need to re-legalize to
+ // at least byte loads. Avoid creating such loads here
+ if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
+ return false;
+
+ const MachineMemOperand &MMO = LoadMI->getMMO();
+ LegalityQuery::MemDesc MemDesc(MMO);
+ MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*LoadMI);
+ auto &MF = B.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
+ B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
+ };
+ return true;
+}
+
bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
const MachineInstr &UseMI) {
assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
"shouldn't consider debug uses");
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
- return false;
+ return true;
const MachineBasicBlock &MBB = *DefMI.getParent();
auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
return &MI == &DefMI || &MI == &UseMI;
@@ -711,6 +811,16 @@ bool CombinerHelper::matchSextInRegOfLoad(
// anyway for most targets.
if (!isPowerOf2_32(NewSizeBits))
return false;
+
+ const MachineMemOperand &MMO = LoadDef->getMMO();
+ LegalityQuery::MemDesc MMDesc(MMO);
+ MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
+ {MRI.getType(LoadDef->getDstReg()),
+ MRI.getType(LoadDef->getPointerReg())},
+ {MMDesc}}))
+ return false;
+
MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
return true;
}
@@ -1093,81 +1203,6 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
- // On Darwin, -Os means optimize for size without hurting performance, so
- // only really optimize for size when -Oz (MinSize) is used.
- if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction().hasMinSize();
- return MF.getFunction().hasOptSize();
-}
-
-// Returns a list of types to use for memory op lowering in MemOps. A partial
-// port of findOptimalMemOpLowering in TargetLowering.
-static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
- unsigned Limit, const MemOp &Op,
- unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes,
- const TargetLowering &TLI) {
- if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
- return false;
-
- LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
-
- if (Ty == LLT()) {
- // Use the largest scalar type whose alignment constraints are satisfied.
- // We only need to check DstAlign here as SrcAlign is always greater or
- // equal to DstAlign (or zero).
- Ty = LLT::scalar(64);
- if (Op.isFixedDstAlign())
- while (Op.getDstAlign() < Ty.getSizeInBytes() &&
- !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
- Ty = LLT::scalar(Ty.getSizeInBytes());
- assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
- // FIXME: check for the largest legal type we can load/store to.
- }
-
- unsigned NumMemOps = 0;
- uint64_t Size = Op.size();
- while (Size) {
- unsigned TySize = Ty.getSizeInBytes();
- while (TySize > Size) {
- // For now, only use non-vector load / store's for the left-over pieces.
- LLT NewTy = Ty;
- // FIXME: check for mem op safety and legality of the types. Not all of
- // SDAGisms map cleanly to GISel concepts.
- if (NewTy.isVector())
- NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
- unsigned NewTySize = NewTy.getSizeInBytes();
- assert(NewTySize > 0 && "Could not find appropriate type");
-
- // If the new LLT cannot cover all of the remaining bits, then consider
- // issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
- // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
- MVT VT = getMVTForLLT(Ty);
- if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
- TLI.allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
- MachineMemOperand::MONone, &Fast) &&
- Fast)
- TySize = Size;
- else {
- Ty = NewTy;
- TySize = NewTySize;
- }
- }
-
- if (++NumMemOps > Limit)
- return false;
-
- MemOps.push_back(Ty);
- Size -= TySize;
- }
-
- return true;
-}
-
static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
@@ -1175,460 +1210,20 @@ static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
return IntegerType::get(C, Ty.getSizeInBits());
}
-// Get a vectorized representation of the memset value operand, GISel edition.
-static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- unsigned NumBits = Ty.getScalarSizeInBits();
- auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
- if (!Ty.isVector() && ValVRegAndVal) {
- APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
- APInt SplatVal = APInt::getSplat(NumBits, Scalar);
- return MIB.buildConstant(Ty, SplatVal).getReg(0);
- }
-
- // Extend the byte value to the larger type, and then multiply by a magic
- // value 0x010101... in order to replicate it across every byte.
- // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
- if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
- return MIB.buildConstant(Ty, 0).getReg(0);
- }
-
- LLT ExtType = Ty.getScalarType();
- auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
- if (NumBits > 8) {
- APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
- auto MagicMI = MIB.buildConstant(ExtType, Magic);
- Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
- }
-
- // For vector types create a G_BUILD_VECTOR.
- if (Ty.isVector())
- Val = MIB.buildSplatVector(Ty, Val).getReg(0);
-
- return Val;
-}
-
-bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
- Register Val, uint64_t KnownLen,
- Align Alignment, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memset length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
-
- auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
- bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
-
- if (!findGISelOptimalMemOpLowering(MemOps, Limit,
- MemOp::Set(KnownLen, DstAlignCanChange,
- Alignment,
- /*IsZeroMemset=*/IsZeroVal,
- /*IsVolatile=*/IsVolatile),
- DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- MachineIRBuilder MIB(MI);
- // Find the largest store and generate the bit pattern for it.
- LLT LargestTy = MemOps[0];
- for (unsigned i = 1; i < MemOps.size(); i++)
- if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
- LargestTy = MemOps[i];
-
- // The memset stored value is always defined as an s8, so in order to make it
- // work with larger store types we need to repeat the bit pattern across the
- // wider type.
- Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
-
- if (!MemSetValue)
- return false;
-
- // Generate the stores. For each store type in the list, we generate the
- // matching store of that type to the destination address.
- LLT PtrTy = MRI.getType(Dst);
- unsigned DstOff = 0;
- unsigned Size = KnownLen;
- for (unsigned I = 0; I < MemOps.size(); I++) {
- LLT Ty = MemOps[I];
- unsigned TySize = Ty.getSizeInBytes();
- if (TySize > Size) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- assert(I == MemOps.size() - 1 && I != 0);
- DstOff -= TySize - Size;
- }
-
- // If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
- Register Value = MemSetValue;
- if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
- MVT VT = getMVTForLLT(Ty);
- MVT LargestVT = getMVTForLLT(LargestTy);
- if (!LargestTy.isVector() && !Ty.isVector() &&
- TLI.isTruncateFree(LargestVT, VT))
- Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
- else
- Value = getMemsetValue(Val, Ty, MIB);
- if (!Value)
- return false;
- }
-
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
-
- Register Ptr = Dst;
- if (DstOff != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
- Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
-
- MIB.buildStore(Value, Ptr, *StoreMMO);
- DstOff += Ty.getSizeInBytes();
- Size -= TySize;
- }
-
- MI.eraseFromParent();
- return true;
-}
-
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
-
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
-
- const auto *MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
- bool IsVolatile = MemOp->isVolatile();
-
- // See if this is a constant length copy
- auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
- // FIXME: support dynamically sized G_MEMCPY_INLINE
- assert(LenVRegAndVal.hasValue() &&
- "inline memcpy with dynamic size is not yet supported");
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return true;
- }
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- Align DstAlign = DstMMO.getBaseAlign();
- Align SrcAlign = SrcMMO.getBaseAlign();
-
- return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
-}
-
-bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- return optimizeMemcpy(MI, Dst, Src, KnownLen,
- std::numeric_limits<uint64_t>::max(), DstAlign,
- SrcAlign, IsVolatile);
-}
-
-bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- uint64_t Limit, Align DstAlign,
- Align SrcAlign, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memcpy length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- // FIXME: infer better src pointer alignment like SelectionDAG does here.
- // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
- // if the memcpy is in a tail call position.
-
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
-
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- IsVolatile),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
-
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
-
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
-
- MachineIRBuilder MIB(MI);
- // Now we need to emit a pair of load and stores for each of the types we've
- // collected. I.e. for each type, generate a load from the source pointer of
- // that type width, and then generate a corresponding store to the dest buffer
- // of that value loaded. This can result in a sequence of loads and stores
- // mixed types, depending on what the target specifies as good types to use.
- unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
- unsigned Size = KnownLen;
- for (auto CopyTy : MemOps) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- if (CopyTy.getSizeInBytes() > Size)
- CurrOffset -= CopyTy.getSizeInBytes() - Size;
-
- // Construct MMOs for the accesses.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- // Create the load.
- Register LoadPtr = Src;
- Register Offset;
- if (CurrOffset != 0) {
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
- .getReg(0);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
- }
- auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
-
- // Create the store.
- Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- MIB.buildStore(LdVal, StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- Size -= CopyTy.getSizeInBytes();
- }
-
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memmove length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
-
- // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
- // to a bug in it's findOptimalMemOpLowering implementation. For now do the
- // same thing here.
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- /*IsVolatile*/ true),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
-
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
-
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
-
- MachineIRBuilder MIB(MI);
- // Memmove requires that we perform the loads first before issuing the stores.
- // Apart from that, this loop is pretty much doing the same thing as the
- // memcpy codegen function.
- unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
- SmallVector<Register, 16> LoadVals;
- for (auto CopyTy : MemOps) {
- // Construct MMO for the load.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- // Create the load.
- Register LoadPtr = Src;
- if (CurrOffset != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
- }
- LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
- CurrOffset += CopyTy.getSizeInBytes();
- }
-
- CurrOffset = 0;
- for (unsigned I = 0; I < MemOps.size(); ++I) {
- LLT CopyTy = MemOps[I];
- // Now store the values loaded.
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- Register StorePtr = Dst;
- if (CurrOffset != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
- MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- }
- MI.eraseFromParent();
- return true;
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemcpyInline(MI) ==
+ LegalizerHelper::LegalizeResult::Legalized;
}
bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
- const unsigned Opc = MI.getOpcode();
- // This combine is fairly complex so it's not written with a separate
- // matcher function.
- assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
- Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
-
- auto MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
-
- Align DstAlign = MemOp->getBaseAlign();
- Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
-
- if (Opc != TargetOpcode::G_MEMSET) {
- assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
- MemOp = *(++MMOIt);
- SrcAlign = MemOp->getBaseAlign();
- }
-
- // See if this is a constant length copy
- auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
- if (!LenVRegAndVal)
- return false; // Leave it to the legalizer to lower it to a libcall.
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
-
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return true;
- }
-
- bool IsVolatile = MemOp->isVolatile();
- if (Opc == TargetOpcode::G_MEMCPY_INLINE)
- return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
-
- // Don't try to optimize volatile.
- if (IsVolatile)
- return false;
-
- if (MaxLen && KnownLen > MaxLen)
- return false;
-
- if (Opc == TargetOpcode::G_MEMCPY) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
- return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
- IsVolatile);
- }
- if (Opc == TargetOpcode::G_MEMMOVE)
- return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (Opc == TargetOpcode::G_MEMSET)
- return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
- return false;
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemCpyFamily(MI, MaxLen) ==
+ LegalizerHelper::LegalizeResult::Legalized;
}
static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
@@ -1706,30 +1301,52 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
Register Add2 = MI.getOperand(1).getReg();
Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
if (!MaybeImmVal)
return false;
- // Don't do this combine if there multiple uses of the first PTR_ADD,
- // since we may be able to compute the second PTR_ADD as an immediate
- // offset anyway. Folding the first offset into the second may cause us
- // to go beyond the bounds of our legal addressing modes.
- if (!MRI.hasOneNonDBGUse(Add2))
- return false;
-
- MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
+ MachineInstr *Add2Def = MRI.getVRegDef(Add2);
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
Register Base = Add2Def->getOperand(1).getReg();
Register Imm2 = Add2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
if (!MaybeImm2Val)
return false;
+ // Check if the new combined immediate forms an illegal addressing mode.
+ // Do not combine if it was legal before but would get illegal.
+ // To do so, we need to find a load/store user of the pointer to get
+ // the access type.
+ Type *AccessTy = nullptr;
+ auto &MF = *MI.getMF();
+ for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
+ if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
+ AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
+ MF.getFunction().getContext());
+ break;
+ }
+ }
+ TargetLoweringBase::AddrMode AMNew;
+ APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ AMNew.BaseOffs = CombinedImm.getSExtValue();
+ if (AccessTy) {
+ AMNew.HasBaseReg = true;
+ TargetLoweringBase::AddrMode AMOld;
+ AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
+ AMOld.HasBaseReg = true;
+ unsigned AS = MRI.getType(Add2).getAddressSpace();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
+ !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
+ return false;
+ }
+
// Pass the combined immediate to the apply function.
- MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Imm = AMNew.BaseOffs;
MatchInfo.Base = Base;
+ MatchInfo.Bank = getRegBank(Imm2);
return true;
}
@@ -1739,6 +1356,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
MachineIRBuilder MIB(MI);
LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
+ setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(MatchInfo.Base);
MI.getOperand(2).setReg(NewOffset.getReg(0));
@@ -1762,7 +1380,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
Register Shl2 = MI.getOperand(1).getReg();
Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
if (!MaybeImmVal)
return false;
@@ -1772,7 +1390,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
Register Base = Shl2Def->getOperand(1).getReg();
Register Imm2 = Shl2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
if (!MaybeImm2Val)
return false;
@@ -1856,7 +1474,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Find a matching one-use shift by constant.
const Register C1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
if (!MaybeImmVal)
return false;
@@ -1870,7 +1488,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Must be a constant.
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -1932,8 +1550,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
// These were one use so it's safe to remove them.
- MatchInfo.Shift2->eraseFromParent();
- MatchInfo.Logic->eraseFromParent();
+ MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval();
+ MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval();
MI.eraseFromParent();
}
@@ -1942,7 +1560,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -1977,7 +1595,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
// TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -2045,26 +1663,23 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
- Register SrcReg =
- peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
- MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
- if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
- SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
- SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+ auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI);
+ if (!SrcInstr)
return false;
// Check the source type of the merge.
- LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
- LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
+ LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
if (SrcMergeTy != Dst0Ty && !SameSize)
return false;
// They are the same now (modulo a bitcast).
// We can collect all the src registers.
- for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
- ++Idx)
- Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+ for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
+ Operands.push_back(SrcInstr->getSourceReg(Idx));
return true;
}
@@ -2241,7 +1856,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
return false;
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -2410,12 +2025,12 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd(
bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register LHS = PtrAdd.getBaseReg();
+ Register RHS = PtrAdd.getOffsetReg();
MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
- if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+ if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) {
int64_t Cst;
if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
NewCst = Cst + *RHSCst;
@@ -2428,12 +2043,12 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register Dst = MI.getOperand(0).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register Dst = PtrAdd.getReg(0);
Builder.setInstrAndDebugLoc(MI);
Builder.buildConstant(Dst, NewCst);
- MI.eraseFromParent();
+ PtrAdd.eraseFromParent();
}
bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
@@ -2536,6 +2151,23 @@ bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
}
+bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Src = MI.getOperand(1).getReg();
+ Register NegSrc;
+
+ if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NegSrc);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineTruncOfExt(
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
@@ -2587,7 +2219,7 @@ bool CombinerHelper::matchCombineTruncOfShl(
{DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
KnownBits Known = KB->getKnownBits(ShiftAmt);
unsigned Size = DstTy.getSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
return true;
}
@@ -2644,13 +2276,13 @@ bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
}
bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
- assert(MI.getOpcode() == TargetOpcode::G_SELECT);
- if (auto MaybeCstCmp =
- getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
- OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
- return true;
- }
- return false;
+ GSelect &SelMI = cast<GSelect>(MI);
+ auto Cst =
+ isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
+ if (!Cst)
+ return false;
+ OpIdx = Cst->isZero() ? 3 : 2;
+ return true;
}
bool CombinerHelper::eraseInst(MachineInstr &MI) {
@@ -2662,12 +2294,14 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
const MachineOperand &MOP2) {
if (!MOP1.isReg() || !MOP2.isReg())
return false;
- MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
- if (!I1)
+ auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
+ if (!InstAndDef1)
return false;
- MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
- if (!I2)
+ auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
+ if (!InstAndDef2)
return false;
+ MachineInstr *I1 = InstAndDef1->MI;
+ MachineInstr *I2 = InstAndDef2->MI;
// Handle a case like this:
//
@@ -2727,15 +2361,26 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
//
// On the off-chance that there's some target instruction feeding into the
// instruction, let's use produceSameValue instead of isIdenticalTo.
- return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
+ if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
+ // Handle instructions with multiple defs that produce same values. Values
+ // are same for operands with same index.
+ // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // I1 and I2 are different instructions but produce same values,
+ // %1 and %6 are same, %1 and %7 are not the same value.
+ return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
+ I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
+ }
+ return false;
}
bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
if (!MOP.isReg())
return false;
- // MIPatternMatch doesn't let us look through G_ZEXT etc.
- auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
- return ValAndVReg && ValAndVReg->Value == C;
+ auto *MI = MRI.getVRegDef(MOP.getReg());
+ auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
+ return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 &&
+ MaybeCst->getSExtValue() == C;
}
bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
@@ -3115,14 +2760,14 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
//
// Check if we can replace AndDst with the LHS of the G_AND
if (canReplaceReg(AndDst, LHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
Replacement = LHS;
return true;
}
// Check if we can replace AndDst with the RHS of the G_AND
if (canReplaceReg(AndDst, RHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
Replacement = RHS;
return true;
}
@@ -3161,14 +2806,14 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
//
// Check if we can replace OrDst with the LHS of the G_OR
if (canReplaceReg(OrDst, LHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
Replacement = LHS;
return true;
}
// Check if we can replace OrDst with the RHS of the G_OR
if (canReplaceReg(OrDst, RHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
Replacement = RHS;
return true;
}
@@ -3346,7 +2991,8 @@ void CombinerHelper::applyXorOfAndWithSameReg(
}
bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register DstReg = PtrAdd.getReg(0);
LLT Ty = MRI.getType(DstReg);
const DataLayout &DL = Builder.getMF().getDataLayout();
@@ -3354,20 +3000,20 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
return false;
if (Ty.isPointer()) {
- auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+ auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
return ConstVal && *ConstVal == 0;
}
assert(Ty.isVector() && "Expecting a vector type");
- const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
return isBuildVectorAllZeros(*VecMI, MRI);
}
void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
- MI.eraseFromParent();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Builder.setInstrAndDebugLoc(PtrAdd);
+ Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
+ PtrAdd.eraseFromParent();
}
/// The second source operand is known to be a power of 2.
@@ -3704,10 +3350,8 @@ bool CombinerHelper::matchLoadOrCombine(
// may not use index 0.
Register Ptr = LowestIdxLoad->getPointerReg();
const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
- LegalityQuery::MemDesc MMDesc;
+ LegalityQuery::MemDesc MMDesc(MMO);
MMDesc.MemoryTy = Ty;
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getSuccessOrdering();
if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
return false;
@@ -3732,6 +3376,274 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return None;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return None;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits!= 0)
+ return None;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return None;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return None;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+ auto &StoreMI = cast<GStore>(MI);
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ // Search the block up for more stores.
+ // We use a search threshold of 10 instructions here because the combiner
+ // works top-down within a block, and we don't want to search an unbounded
+ // number of predecessor instructions trying to find matching stores.
+ // If we moved this optimization into a separate pass then we could probably
+ // use a more efficient search without having a hard-coded threshold.
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ return false;
+ }
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = getTargetLowering().allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresRequired; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
+ return false;
+
+ MatchInfo.NeedBSwap = NeedBswap;
+ MatchInfo.NeedRotate = NeedRotate;
+ MatchInfo.LowestIdxStore = LowestIdxStore;
+ MatchInfo.WideSrcVal = WideSrcVal;
+ MatchInfo.FoundStores = std::move(FoundStores);
+ return true;
+}
+
+void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+
+ Builder.setInstrAndDebugLoc(MI);
+ Register WideSrcVal = MatchInfo.WideSrcVal;
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+
+ if (MatchInfo.NeedBSwap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (MatchInfo.NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
+ MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
+ MatchInfo.LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : MatchInfo.FoundStores)
+ ST->eraseFromParent();
+}
+
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
@@ -3844,7 +3756,7 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
{TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
return false;
- auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
return false;
@@ -3917,7 +3829,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
MRI.use_instr_nodbg_end())) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
- auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
unsigned Idx = Cst.getValue().getZExtValue();
@@ -4064,6 +3976,78 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchICmpToLHSKnownBits(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // Given:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP ne %x, 0
+ //
+ // Or:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP eq %x, 1
+ //
+ // We can replace %cmp with %x assuming true is 1 on the target.
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if (!CmpInst::isEquality(Pred))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(),
+ /* IsFP = */ false) != 1)
+ return false;
+ int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
+ return false;
+ Register LHS = MI.getOperand(2).getReg();
+ auto KnownLHS = KB->getKnownBits(LHS);
+ if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
+ return false;
+ // Make sure replacing Dst with the LHS is a legal operation.
+ LLT LHSTy = MRI.getType(LHS);
+ unsigned LHSSize = LHSTy.getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Op = TargetOpcode::COPY;
+ if (DstSize != LHSSize)
+ Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
+ if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
+ return true;
+}
+
+// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
+bool CombinerHelper::matchAndOrDisjointMask(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // Ignore vector types to simplify matching the two constants.
+ // TODO: do this for vectors and scalars via a demanded bits analysis.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isVector())
+ return false;
+
+ Register Src;
+ int64_t MaskAnd;
+ int64_t MaskOr;
+ if (!mi_match(MI, MRI,
+ m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd))))
+ return false;
+
+ // Check if MaskOr could turn on any bits in Src.
+ if (MaskAnd & MaskOr)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Src);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
@@ -4130,6 +4114,104 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
return true;
}
+bool CombinerHelper::matchBitfieldExtractFromShr(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+
+ const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SBFX
+ : TargetOpcode::G_UBFX;
+
+ // Check if the type we would use for the extract is legal
+ LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
+ return false;
+
+ Register ShlSrc;
+ int64_t ShrAmt;
+ int64_t ShlAmt;
+ const unsigned Size = Ty.getScalarSizeInBits();
+
+ // Try to match shr (shl x, c1), c2
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ // Make sure that the shift sizes can fit a bitfield extract
+ if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
+ return false;
+
+ // Skip this combine if the G_SEXT_INREG combine could handle it
+ if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
+ return false;
+
+ // Calculate start position and width of the extract
+ const int64_t Pos = ShrAmt - ShlAmt;
+ const int64_t Width = Size - ShrAmt;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
+ B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchBitfieldExtractFromShrAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
+ TargetOpcode::G_UBFX, Ty, Ty))
+ return false;
+
+ // Try to match shr (and x, c1), c2
+ Register AndSrc;
+ int64_t ShrAmt;
+ int64_t SMask;
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ const unsigned Size = Ty.getScalarSizeInBits();
+ if (ShrAmt < 0 || ShrAmt >= Size)
+ return false;
+
+ // Check that ubfx can do the extraction, with no holes in the mask.
+ uint64_t UMask = SMask;
+ UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
+ UMask &= maskTrailingOnes<uint64_t>(Size);
+ if (!isMask_64(UMask))
+ return false;
+
+ // Calculate start position and width of the extract.
+ const int64_t Pos = ShrAmt;
+ const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
+
+ // It's preferable to keep the shift, rather than form G_SBFX.
+ // TODO: remove the G_AND via demanded bits analysis.
+ if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(Ty, Width);
+ auto PosCst = B.buildConstant(Ty, Pos);
+ B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
MachineInstr &PtrAdd) {
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
@@ -4144,10 +4226,10 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
if (MRI.hasOneNonDBGUse(Src1Reg))
return false;
- auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
if (!C1)
return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
if (!C2)
return false;
@@ -4198,9 +4280,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
return false;
}
-bool CombinerHelper::matchReassocPtrAdd(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // if and only if (G_PTR_ADD X, C) has one use.
+ Register LHSBase;
+ Optional<ValueAndVReg> LHSCstOff;
+ if (!mi_match(MI.getBaseReg(), MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
+ return false;
+
+ auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ // When we change LHSPtrAdd's offset register we might cause it to use a reg
+ // before its def. Sink the instruction so the outer PTR_ADD to ensure this
+ // doesn't happen.
+ LHSPtrAdd->moveBefore(&MI);
+ Register RHSReg = MI.getOffsetReg();
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(LHSCstOff->VReg);
+ Observer.changedInstr(MI);
+ Observer.changingInstr(*LHSPtrAdd);
+ LHSPtrAdd->getOperand(2).setReg(RHSReg);
+ Observer.changedInstr(*LHSPtrAdd);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
+ if (!LHSPtrAdd)
+ return false;
+
+ Register Src2Reg = MI.getOperand(2).getReg();
+ Register LHSSrc1 = LHSPtrAdd->getBaseReg();
+ Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
+ auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
// We're trying to match a few pointer computation patterns here for
// re-association opportunities.
// 1) Isolating a constant operand to be on the RHS, e.g.:
@@ -4209,49 +4373,26 @@ bool CombinerHelper::matchReassocPtrAdd(
// 2) Folding two constants in each sub-tree as long as such folding
// doesn't break a legal addressing mode.
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
- MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
- MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
+ //
+ // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // iif (G_PTR_ADD X, C) has one use.
+ MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
+ MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
- if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
- // Try to match example 1).
- if (RHS->getOpcode() != TargetOpcode::G_ADD)
- return false;
- auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
- if (!C2)
- return false;
+ // Try to match example 2.
+ if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- MatchInfo = [=,&MI](MachineIRBuilder &B) {
- LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+ // Try to match example 3.
+ if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- auto NewBase =
- Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(NewBase.getReg(0));
- MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
- Observer.changedInstr(MI);
- };
- } else {
- // Try to match example 2.
- Register LHSSrc1 = LHS->getOperand(1).getReg();
- Register LHSSrc2 = LHS->getOperand(2).getReg();
- auto C1 = getConstantVRegVal(LHSSrc2, MRI);
- if (!C1)
- return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
- if (!C2)
- return false;
+ // Try to match example 1.
+ if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
+ return true;
- MatchInfo = [=, &MI](MachineIRBuilder &B) {
- auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(LHSSrc1);
- MI.getOperand(2).setReg(NewCst.getReg(0));
- Observer.changedInstr(MI);
- };
- }
- return !reassociationCanBreakAddressingModePattern(MI);
+ return false;
}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
@@ -4264,6 +4405,361 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
return true;
}
+bool CombinerHelper::matchNarrowBinopFeedingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ // Look for a binop feeding into an AND with a mask:
+ //
+ // %add = G_ADD %lhs, %rhs
+ // %and = G_AND %add, 000...11111111
+ //
+ // Check if it's possible to perform the binop at a narrower width and zext
+ // back to the original width like so:
+ //
+ // %narrow_lhs = G_TRUNC %lhs
+ // %narrow_rhs = G_TRUNC %rhs
+ // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
+ // %new_add = G_ZEXT %narrow_add
+ // %and = G_AND %new_add, 000...11111111
+ //
+ // This can allow later combines to eliminate the G_AND if it turns out
+ // that the mask is irrelevant.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ Register Dst = MI.getOperand(0).getReg();
+ Register AndLHS = MI.getOperand(1).getReg();
+ Register AndRHS = MI.getOperand(2).getReg();
+ LLT WideTy = MRI.getType(Dst);
+
+ // If the potential binop has more than one use, then it's possible that one
+ // of those uses will need its full width.
+ if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
+ return false;
+
+ // Check if the LHS feeding the AND is impacted by the high bits that we're
+ // masking out.
+ //
+ // e.g. for 64-bit x, y:
+ //
+ // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
+ MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
+ if (!LHSInst)
+ return false;
+ unsigned LHSOpc = LHSInst->getOpcode();
+ switch (LHSOpc) {
+ default:
+ return false;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ break;
+ }
+
+ // Find the mask on the RHS.
+ auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
+ if (!Cst)
+ return false;
+ auto Mask = Cst->Value;
+ if (!Mask.isMask())
+ return false;
+
+ // No point in combining if there's nothing to truncate.
+ unsigned NarrowWidth = Mask.countTrailingOnes();
+ if (NarrowWidth == WideTy.getSizeInBits())
+ return false;
+ LLT NarrowTy = LLT::scalar(NarrowWidth);
+
+ // Check if adding the zext + truncates could be harmful.
+ auto &MF = *MI.getMF();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
+ !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
+ return false;
+ Register BinOpLHS = LHSInst->getOperand(1).getReg();
+ Register BinOpRHS = LHSInst->getOperand(2).getReg();
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
+ auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
+ auto NarrowBinOp =
+ Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
+ auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Ext.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
+ // Check for a constant 2 or a splat of 2 on the RHS.
+ auto RHS = MI.getOperand(3).getReg();
+ bool IsVector = MRI.getType(RHS).isVector();
+ if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
+ return false;
+ if (IsVector) {
+ // FIXME: There's no mi_match pattern for this yet.
+ auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
+ if (!RHSDef)
+ return false;
+ auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
+ if (!Splat || *Splat != 2)
+ return false;
+ }
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
+ : TargetOpcode::G_SADDO;
+ MI.setDesc(Builder.getTII().get(NewOpc));
+ MI.getOperand(3).setReg(MI.getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ auto &UDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = UDiv.getReg(0);
+ Register LHS = UDiv.getReg(1);
+ Register RHS = UDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ const unsigned EltBits = ScalarTy.getScalarSizeInBits();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseNPQ = false;
+ SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](const Constant *C) {
+ auto *CI = cast<ConstantInt>(C);
+ const APInt &Divisor = CI->getValue();
+ UnsignedDivisonByConstantInfo magics =
+ UnsignedDivisonByConstantInfo::get(Divisor);
+ unsigned PreShift = 0, PostShift = 0;
+
+ // If the divisor is even, we can avoid using the expensive fixup by
+ // shifting the divided value upfront.
+ if (magics.IsAdd != 0 && !Divisor[0]) {
+ PreShift = Divisor.countTrailingZeros();
+ // Get magic number for the shifted divisor.
+ magics =
+ UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
+ }
+
+ APInt Magic = magics.Magic;
+
+ unsigned SelNPQ;
+ if (magics.IsAdd == 0 || Divisor.isOneValue()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ PostShift = magics.ShiftAmount;
+ SelNPQ = false;
+ } else {
+ PostShift = magics.ShiftAmount - 1;
+ SelNPQ = true;
+ }
+
+ PreShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
+ MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
+ NPQFactors.push_back(
+ MIB.buildConstant(ScalarTy,
+ SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits))
+ .getReg(0));
+ PostShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
+
+ // Collect the shifts/magic values from each element.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register PreShift, PostShift, MagicFactor, NPQFactor;
+ auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
+ if (RHSDef) {
+ PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
+ MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
+ NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
+ PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
+ } else {
+ assert(MRI.getType(RHS).isScalar() &&
+ "Non-build_vector operation should have been a scalar");
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ Register Q = LHS;
+ Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
+
+ if (UseNPQ) {
+ Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (Ty.isVector())
+ NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
+ else
+ NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
+
+ Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
+ }
+
+ Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+ auto One = MIB.buildConstant(Ty, 1);
+ auto IsOne = MIB.buildICmp(
+ CmpInst::Predicate::ICMP_EQ,
+ Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
+ return MIB.buildSelect(Ty, IsOne, LHS, Q);
+}
+
+bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ auto *RHSDef = MRI.getVRegDef(RHS);
+ if (!isConstantOrConstantVector(*RHSDef, MRI))
+ return false;
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // Don't do this if the types are not going to be legal.
+ if (LI) {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ICMP,
+ {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+ DstTy}}))
+ return false;
+ }
+
+ auto CheckEltValue = [&](const Constant *C) {
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
+ return !CI->isZero();
+ return false;
+ };
+ return matchUnaryPredicate(MRI, RHS, CheckEltValue);
+}
+
+void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildUDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UMULH);
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ auto MatchPow2ExceptOne = [&](const Constant *C) {
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
+ return false;
+ };
+ if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
+ return false;
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
+}
+
+void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ unsigned NumEltBits = Ty.getScalarSizeInBits();
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto LogBase2 = buildLogBase2(RHS, Builder);
+ auto ShiftAmt =
+ Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
+ auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
+ Builder.buildLShr(Dst, LHS, Trunc);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
+ Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ LLT Type = MRI.getType(Dst);
+
+ // fold (fadd x, fneg(y)) -> (fsub x, y)
+ // fold (fadd fneg(y), x) -> (fsub x, y)
+ // G_ADD is commutative so both cases are checked by m_GFAdd
+ if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
+ Opc = TargetOpcode::G_FSUB;
+ }
+ /// fold (fsub x, fneg(y)) -> (fadd x, y)
+ else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
+ Opc = TargetOpcode::G_FADD;
+ }
+ // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
+ // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
+ // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
+ // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
+ else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
+ mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
+ mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
+ // no opcode change
+ } else
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(Opc));
+ MI.getOperand(1).setReg(X);
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;