aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp576
1 files changed, 459 insertions, 117 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index a103e8e4e6e0..194961ae3b21 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -9,6 +9,8 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -17,11 +19,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "gi-combiner"
using namespace llvm;
+using namespace MIPatternMatch;
// Option to allow testing of the combiner while no targets know about indexed
// addressing.
@@ -33,9 +37,10 @@ static cl::opt<bool>
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
- MachineDominatorTree *MDT)
+ MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
- KB(KB), MDT(MDT) {
+ KB(KB), MDT(MDT), LI(LI) {
(void)this->KB;
}
@@ -74,36 +79,7 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
return false;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
-
- // Give up if either DstReg or SrcReg is a physical register.
- if (Register::isPhysicalRegister(DstReg) ||
- Register::isPhysicalRegister(SrcReg))
- return false;
-
- // Give up the types don't match.
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- // Give up if one has a valid LLT, but the other doesn't.
- if (DstTy.isValid() != SrcTy.isValid())
- return false;
- // Give up if the types don't match.
- if (DstTy.isValid() && SrcTy.isValid() && DstTy != SrcTy)
- return false;
-
- // Get the register banks and classes.
- const RegisterBank *DstBank = MRI.getRegBankOrNull(DstReg);
- const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg);
- const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg);
- const TargetRegisterClass *SrcRC = MRI.getRegClassOrNull(SrcReg);
-
- // Replace if the register constraints match.
- if ((SrcRC == DstRC) && (SrcBank == DstBank))
- return true;
- // Replace if DstReg has no constraints.
- if (!DstBank && !DstRC)
- return true;
-
- return false;
+ return canReplaceReg(DstReg, SrcReg, MRI);
}
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
@@ -294,7 +270,7 @@ namespace {
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
- const LLT &TyForCandidate,
+ const LLT TyForCandidate,
unsigned OpcodeForCandidate,
MachineInstr *MIForCandidate) {
if (!CurrentUse.Ty.isValid()) {
@@ -428,10 +404,23 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
Preferred = {LLT(), PreferredOpcode, nullptr};
- for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
+ for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
- UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
+ (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
+ // Check for legality.
+ if (LI) {
+ LegalityQuery::MemDesc MMDesc;
+ const auto &MMO = **MI.memoperands_begin();
+ MMDesc.SizeInBits = MMO.getSizeInBits();
+ MMDesc.AlignInBits = MMO.getAlign().value() * 8;
+ MMDesc.Ordering = MMO.getOrdering();
+ LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
+ LegalizeActions::Legal)
+ continue;
+ }
Preferred = ChoosePreferredUse(Preferred,
MRI.getType(UseMI.getOperand(0).getReg()),
UseMI.getOpcode(), &UseMI);
@@ -498,7 +487,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
Register UseDstReg = UseMI->getOperand(0).getReg();
MachineOperand &UseSrcMO = UseMI->getOperand(1);
- const LLT &UseDstTy = MRI.getType(UseDstReg);
+ const LLT UseDstTy = MRI.getType(UseDstReg);
if (UseDstReg != ChosenDstReg) {
if (Preferred.Ty == UseDstTy) {
// If the use has the same type as the preferred use, then merge
@@ -559,7 +548,10 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI);
}
-bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
+bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) {
+ assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
+ "shouldn't consider debug uses");
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
return false;
@@ -572,7 +564,10 @@ bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
llvm_unreachable("Block must contain instructions");
}
-bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
+bool CombinerHelper::dominates(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) {
+ assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
+ "shouldn't consider debug uses");
if (MDT)
return MDT->dominates(&DefMI, &UseMI);
else if (DefMI.getParent() != UseMI.getParent())
@@ -581,6 +576,24 @@ bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
return isPredecessor(DefMI, UseMI);
}
+bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register SrcReg = MI.getOperand(1).getReg();
+ unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
+ unsigned NumSextBits =
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
+ MI.getOperand(2).getImm();
+ return SrcSignBits >= NumSextBits;
+}
+
+bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ MachineIRBuilder MIB(MI);
+ MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
Register &Base, Register &Offset) {
auto &MF = *MI.getParent()->getParent();
@@ -599,7 +612,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
- for (auto &Use : MRI.use_instructions(Base)) {
+ for (auto &Use : MRI.use_nodbg_instructions(Base)) {
if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
continue;
@@ -626,7 +639,8 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
// forming an indexed one.
bool MemOpDominatesAddrUses = true;
- for (auto &PtrAddUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+ for (auto &PtrAddUse :
+ MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
if (!dominates(MI, PtrAddUse)) {
MemOpDominatesAddrUses = false;
break;
@@ -661,7 +675,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
Addr = MI.getOperand(1).getReg();
MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
- if (!AddrDef || MRI.hasOneUse(Addr))
+ if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
return false;
Base = AddrDef->getOperand(1).getReg();
@@ -699,7 +713,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
// FIXME: check whether all uses of the base pointer are constant PtrAdds.
// That might allow us to end base's liveness here by adjusting the constant.
- for (auto &UseMI : MRI.use_instructions(Addr)) {
+ for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
if (!dominates(MI, UseMI)) {
LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
return false;
@@ -811,7 +825,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
- !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
+ !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
return false;
return true;
}
@@ -854,38 +868,32 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// Returns a list of types to use for memory op lowering in MemOps. A partial
// port of findOptimalMemOpLowering in TargetLowering.
-static bool findGISelOptimalMemOpLowering(
- std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes, const TargetLowering &TLI) {
- // If 'SrcAlign' is zero, that means the memory operation does not need to
- // load the value, i.e. memset or memcpy from constant string. Otherwise,
- // it's the inferred alignment of the source. 'DstAlign', on the other hand,
- // is the specified alignment of the memory operation. If it is zero, that
- // means it's possible to change the alignment of the destination.
- // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
- // not need to be loaded.
- if (SrcAlign != 0 && SrcAlign < DstAlign)
+static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ const TargetLowering &TLI) {
+ if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
return false;
- LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
- ZeroMemset, MemcpyStrSrc, FuncAttributes);
+ LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
if (Ty == LLT()) {
// Use the largest scalar type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
Ty = LLT::scalar(64);
- while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
- !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
- Ty = LLT::scalar(Ty.getSizeInBytes());
+ if (Op.isFixedDstAlign())
+ while (Op.getDstAlign() < Ty.getSizeInBytes() &&
+ !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
+ Ty = LLT::scalar(Ty.getSizeInBytes());
assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
// FIXME: check for the largest legal type we can load/store to.
}
unsigned NumMemOps = 0;
- while (Size != 0) {
+ uint64_t Size = Op.size();
+ while (Size) {
unsigned TySize = Ty.getSizeInBytes();
while (TySize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
@@ -903,9 +911,10 @@ static bool findGISelOptimalMemOpLowering(
bool Fast;
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
MVT VT = getMVTForLLT(Ty);
- if (NumMemOps && AllowOverlap && NewTySize < Size &&
+ if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
TLI.allowsMisalignedMemoryAccesses(
- VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
+ MachineMemOperand::MONone, &Fast) &&
Fast)
TySize = Size;
else {
@@ -926,8 +935,8 @@ static bool findGISelOptimalMemOpLowering(
static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
- return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
return IntegerType::get(C, Ty.getSizeInBits());
}
@@ -942,12 +951,14 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
- // FIXME: for vector types create a G_BUILD_VECTOR.
- if (Ty.isVector())
- return Register();
// Extend the byte value to the larger type, and then multiply by a magic
// value 0x010101... in order to replicate it across every byte.
+ // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
+ if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
+ return MIB.buildConstant(Ty, 0).getReg(0);
+ }
+
LLT ExtType = Ty.getScalarType();
auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
if (NumBits > 8) {
@@ -956,13 +967,16 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
}
- assert(ExtType == Ty && "Vector memset value type not supported yet");
+ // For vector types create a G_BUILD_VECTOR.
+ if (Ty.isVector())
+ Val = MIB.buildSplatVector(Ty, Val).getReg(0);
+
return Val;
}
-bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
- unsigned KnownLen, unsigned Align,
- bool IsVolatile) {
+bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
+ Register Val, unsigned KnownLen,
+ Align Alignment, bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
@@ -987,24 +1001,25 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
- /*IsMemset=*/true,
- /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
- /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes(), TLI))
+ if (!findGISelOptimalMemOpLowering(MemOps, Limit,
+ MemOp::Set(KnownLen, DstAlignCanChange,
+ Alignment,
+ /*IsZeroMemset=*/IsZeroVal,
+ /*IsVolatile=*/IsVolatile),
+ DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes(), TLI))
return false;
if (DstAlignCanChange) {
// Get an estimate of the type from the LLT.
Type *IRTy = getTypeForLLT(MemOps[0], C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
- if (NewAlign > Align) {
- Align = NewAlign;
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI) < Align)
- MFI.setObjectAlignment(FI, Align);
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
}
}
@@ -1072,10 +1087,9 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
return true;
}
-
bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
Register Src, unsigned KnownLen,
- unsigned DstAlign, unsigned SrcAlign,
+ Align DstAlign, Align SrcAlign,
bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -1087,7 +1101,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
- unsigned Alignment = MinAlign(DstAlign, SrcAlign);
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
@@ -1106,32 +1120,30 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
if (!findGISelOptimalMemOpLowering(
- MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
- SrcAlign,
- /*IsMemset=*/false,
- /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
- /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ IsVolatile),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
return false;
if (DstAlignCanChange) {
// Get an estimate of the type from the LLT.
Type *IRTy = getTypeForLLT(MemOps[0], C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Alignment &&
- DL.exceedsNaturalStackAlignment(Align(NewAlign)))
- NewAlign /= 2;
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
if (NewAlign > Alignment) {
Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI) < Alignment)
+ if (MFI.getObjectAlign(FI) < Alignment)
MFI.setObjectAlignment(FI, Alignment);
}
}
@@ -1156,7 +1168,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
// Construct MMOs for the accesses.
auto *LoadMMO =
MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
- auto *StoreMMO =
+ auto *StoreMMO =
MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
// Create the load.
@@ -1182,9 +1194,9 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
}
bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
- Register Src, unsigned KnownLen,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsVolatile) {
+ Register Src, unsigned KnownLen,
+ Align DstAlign, Align SrcAlign,
+ bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
@@ -1195,7 +1207,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
- unsigned Alignment = MinAlign(DstAlign, SrcAlign);
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
@@ -1213,32 +1225,30 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
// to a bug in it's findOptimalMemOpLowering implementation. For now do the
// same thing here.
if (!findGISelOptimalMemOpLowering(
- MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
- SrcAlign,
- /*IsMemset=*/false,
- /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
- /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
return false;
if (DstAlignCanChange) {
// Get an estimate of the type from the LLT.
Type *IRTy = getTypeForLLT(MemOps[0], C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Alignment &&
- DL.exceedsNaturalStackAlignment(Align(NewAlign)))
- NewAlign /= 2;
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
if (NewAlign > Alignment) {
Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI) < Alignment)
+ if (MFI.getObjectAlign(FI) < Alignment)
MFI.setObjectAlignment(FI, Alignment);
}
}
@@ -1304,8 +1314,8 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
if (IsVolatile)
return false;
- unsigned DstAlign = MemOp->getBaseAlignment();
- unsigned SrcAlign = 0;
+ Align DstAlign = MemOp->getBaseAlign();
+ Align SrcAlign;
Register Dst = MI.getOperand(1).getReg();
Register Src = MI.getOperand(2).getReg();
Register Len = MI.getOperand(3).getReg();
@@ -1313,7 +1323,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
if (ID != Intrinsic::memset) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
MemOp = *(++MMOIt);
- SrcAlign = MemOp->getBaseAlignment();
+ SrcAlign = MemOp->getBaseAlign();
}
// See if this is a constant length copy
@@ -1385,6 +1395,338 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
+ unsigned &ShiftVal) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ auto MaybeImmVal =
+ getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
+ return false;
+ ShiftVal = Log2_64(MaybeImmVal->Value);
+ return true;
+}
+
+bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
+ unsigned &ShiftVal) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ MachineIRBuilder MIB(MI);
+ LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
+ auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
+ Observer.changingInstr(MI);
+ MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
+ MI.getOperand(2).setReg(ShiftCst.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
+ unsigned TargetShiftSize,
+ unsigned &ShiftVal) {
+ assert((MI.getOpcode() == TargetOpcode::G_SHL ||
+ MI.getOpcode() == TargetOpcode::G_LSHR ||
+ MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isVector()) // TODO:
+ return false;
+
+ // Don't narrow further than the requested size.
+ unsigned Size = Ty.getSizeInBits();
+ if (Size <= TargetShiftSize)
+ return false;
+
+ auto MaybeImmVal =
+ getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value;
+ return ShiftVal >= Size / 2 && ShiftVal < Size;
+}
+
+bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
+ const unsigned &ShiftVal) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
+ unsigned Size = Ty.getSizeInBits();
+ unsigned HalfSize = Size / 2;
+ assert(ShiftVal >= HalfSize);
+
+ LLT HalfTy = LLT::scalar(HalfSize);
+
+ Builder.setInstr(MI);
+ auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
+ unsigned NarrowShiftAmt = ShiftVal - HalfSize;
+
+ if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ Register Narrowed = Unmerge.getReg(1);
+
+ // dst = G_LSHR s64:x, C for C >= 32
+ // =>
+ // lo, hi = G_UNMERGE_VALUES x
+ // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
+
+ if (NarrowShiftAmt != 0) {
+ Narrowed = Builder.buildLShr(HalfTy, Narrowed,
+ Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
+ }
+
+ auto Zero = Builder.buildConstant(HalfTy, 0);
+ Builder.buildMerge(DstReg, { Narrowed, Zero });
+ } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
+ Register Narrowed = Unmerge.getReg(0);
+ // dst = G_SHL s64:x, C for C >= 32
+ // =>
+ // lo, hi = G_UNMERGE_VALUES x
+ // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
+ if (NarrowShiftAmt != 0) {
+ Narrowed = Builder.buildShl(HalfTy, Narrowed,
+ Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
+ }
+
+ auto Zero = Builder.buildConstant(HalfTy, 0);
+ Builder.buildMerge(DstReg, { Zero, Narrowed });
+ } else {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ auto Hi = Builder.buildAShr(
+ HalfTy, Unmerge.getReg(1),
+ Builder.buildConstant(HalfTy, HalfSize - 1));
+
+ if (ShiftVal == HalfSize) {
+ // (G_ASHR i64:x, 32) ->
+ // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
+ Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
+ } else if (ShiftVal == Size - 1) {
+ // Don't need a second shift.
+ // (G_ASHR i64:x, 63) ->
+ // %narrowed = (G_ASHR hi_32(x), 31)
+ // G_MERGE_VALUES %narrowed, %narrowed
+ Builder.buildMerge(DstReg, { Hi, Hi });
+ } else {
+ auto Lo = Builder.buildAShr(
+ HalfTy, Unmerge.getReg(1),
+ Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
+
+ // (G_ASHR i64:x, C) ->, for C >= 32
+ // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
+ Builder.buildMerge(DstReg, { Lo, Hi });
+ }
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
+ unsigned TargetShiftAmount) {
+ unsigned ShiftAmt;
+ if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
+ applyCombineShiftToUnmerge(MI, ShiftAmt);
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
+ return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
+ return MO.isReg() &&
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+ });
+}
+
+bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
+ return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
+ return !MO.isReg() ||
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+ });
+}
+
+bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ return all_of(Mask, [](int Elt) { return Elt < 0; });
+}
+
+bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_STORE);
+ return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::eraseInst(MachineInstr &MI) {
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
+ const MachineOperand &MOP2) {
+ if (!MOP1.isReg() || !MOP2.isReg())
+ return false;
+ MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
+ if (!I1)
+ return false;
+ MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
+ if (!I2)
+ return false;
+
+ // Handle a case like this:
+ //
+ // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
+ //
+ // Even though %0 and %1 are produced by the same instruction they are not
+ // the same values.
+ if (I1 == I2)
+ return MOP1.getReg() == MOP2.getReg();
+
+ // If we have an instruction which loads or stores, we can't guarantee that
+ // it is identical.
+ //
+ // For example, we may have
+ //
+ // %x1 = G_LOAD %addr (load N from @somewhere)
+ // ...
+ // call @foo
+ // ...
+ // %x2 = G_LOAD %addr (load N from @somewhere)
+ // ...
+ // %or = G_OR %x1, %x2
+ //
+ // It's possible that @foo will modify whatever lives at the address we're
+ // loading from. To be safe, let's just assume that all loads and stores
+ // are different (unless we have something which is guaranteed to not
+ // change.)
+ if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
+ return false;
+
+ // Check for physical registers on the instructions first to avoid cases
+ // like this:
+ //
+ // %a = COPY $physreg
+ // ...
+ // SOMETHING implicit-def $physreg
+ // ...
+ // %b = COPY $physreg
+ //
+ // These copies are not equivalent.
+ if (any_of(I1->uses(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isPhysical();
+ })) {
+ // Check if we have a case like this:
+ //
+ // %a = COPY $physreg
+ // %b = COPY %a
+ //
+ // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
+ // From that, we know that they must have the same value, since they must
+ // have come from the same COPY.
+ return I1->isIdenticalTo(*I2);
+ }
+
+ // We don't have any physical registers, so we don't necessarily need the
+ // same vreg defs.
+ //
+ // On the off-chance that there's some target instruction feeding into the
+ // instruction, let's use produceSameValue instead of isIdenticalTo.
+ return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
+}
+
+bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
+ if (!MOP.isReg())
+ return false;
+ // MIPatternMatch doesn't let us look through G_ZEXT etc.
+ auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
+ return ValAndVReg && ValAndVReg->Value == C;
+}
+
+bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
+ unsigned OpIdx) {
+ assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+ Register OldReg = MI.getOperand(0).getReg();
+ Register Replacement = MI.getOperand(OpIdx).getReg();
+ assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+ MI.eraseFromParent();
+ replaceRegWith(MRI, OldReg, Replacement);
+ return true;
+}
+
+bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ // Match (cond ? x : x)
+ return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
+ return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
+ return matchConstantOp(MI.getOperand(OpIdx), 0) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildFConstant(MI.getOperand(0), C);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildConstant(MI.getOperand(0), C);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildUndef(MI.getOperand(0));
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchSimplifyAddToSub(
+ MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register &NewLHS = std::get<0>(MatchInfo);
+ Register &NewRHS = std::get<1>(MatchInfo);
+
+ // Helper lambda to check for opportunities for
+ // ((0-A) + B) -> B - A
+ // (A + (0-B)) -> A - B
+ auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
+ int64_t Cst;
+ if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
+ Cst != 0)
+ return false;
+ NewLHS = MaybeNewLHS;
+ return true;
+ };
+
+ return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
+}
+
+bool CombinerHelper::applySimplifyAddToSub(
+ MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register SubLHS, SubRHS;
+ std::tie(SubLHS, SubRHS) = MatchInfo;
+ Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;