aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-09-02 21:17:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-12-08 17:34:50 +0000
commit06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch)
tree62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
parentcf037972ea8863e2bab7461d77345367d2c1e054 (diff)
parent7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)
downloadsrc-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.tar.gz
src-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.zip
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp535
1 files changed, 184 insertions, 351 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index af4bb1634746..cc7fb3ee1109 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -399,7 +399,8 @@ namespace {
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
-PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
+PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
+ PreferredTuple &CurrentUse,
const LLT TyForCandidate,
unsigned OpcodeForCandidate,
MachineInstr *MIForCandidate) {
@@ -425,8 +426,10 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
// Prefer sign extensions to zero extensions as sign-extensions tend to be
- // more expensive.
- if (CurrentUse.Ty == TyForCandidate) {
+ // more expensive. Don't do this if the load is already a zero-extend load
+ // though, otherwise we'll rewrite a zero-extend load into a sign-extend
+ // later.
+ if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
OpcodeForCandidate == TargetOpcode::G_ZEXT)
return CurrentUse;
@@ -535,7 +538,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
// For non power-of-2 types, they will very likely be legalized into multiple
// loads. Don't bother trying to match them into extending loads.
- if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
+ if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
return false;
// Find the preferred type aside from the any-extends (unless it's the only
@@ -566,7 +569,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
.Action != LegalizeActions::Legal)
continue;
}
- Preferred = ChoosePreferredUse(Preferred,
+ Preferred = ChoosePreferredUse(MI, Preferred,
MRI.getType(UseMI.getOperand(0).getReg()),
UseMI.getOpcode(), &UseMI);
}
@@ -727,7 +730,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
Register PtrReg = LoadMI->getPointerReg();
unsigned RegSize = RegTy.getSizeInBits();
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
- unsigned MaskSizeBits = MaskVal.countTrailingOnes();
+ unsigned MaskSizeBits = MaskVal.countr_one();
// The mask may not be larger than the in-memory type, as it might cover sign
// extended bits
@@ -1189,16 +1192,22 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
// Check which instruction is first in the block so we don't break def-use
- // deps by "moving" the instruction incorrectly.
- if (dominates(MI, *OtherMI))
+ // deps by "moving" the instruction incorrectly. Also keep track of which
+ // instruction is first so we pick it's operands, avoiding use-before-def
+ // bugs.
+ MachineInstr *FirstInst;
+ if (dominates(MI, *OtherMI)) {
Builder.setInstrAndDebugLoc(MI);
- else
+ FirstInst = &MI;
+ } else {
Builder.setInstrAndDebugLoc(*OtherMI);
+ FirstInst = OtherMI;
+ }
Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
: TargetOpcode::G_UDIVREM,
{DestDivReg, DestRemReg},
- {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
+ { FirstInst->getOperand(1), FirstInst->getOperand(2) });
MI.eraseFromParent();
OtherMI->eraseFromParent();
}
@@ -1285,65 +1294,57 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
LegalizerHelper::LegalizeResult::Legalized;
}
-static std::optional<APFloat>
-constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op,
- const MachineRegisterInfo &MRI) {
- const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
- if (!MaybeCst)
- return std::nullopt;
-
- APFloat V = MaybeCst->getValueAPF();
- switch (Opcode) {
+static APFloat constantFoldFpUnary(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const APFloat &Val) {
+ APFloat Result(Val);
+ switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
case TargetOpcode::G_FNEG: {
- V.changeSign();
- return V;
+ Result.changeSign();
+ return Result;
}
case TargetOpcode::G_FABS: {
- V.clearSign();
- return V;
+ Result.clearSign();
+ return Result;
+ }
+ case TargetOpcode::G_FPTRUNC: {
+ bool Unused;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Result.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return Result;
}
- case TargetOpcode::G_FPTRUNC:
- break;
case TargetOpcode::G_FSQRT: {
bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(sqrt(V.convertToDouble()));
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(sqrt(Result.convertToDouble()));
break;
}
case TargetOpcode::G_FLOG2: {
bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(log2(V.convertToDouble()));
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(log2(Result.convertToDouble()));
break;
}
}
// Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
- // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
- // and `G_FLOG2` reach here.
+ // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
+ // `G_FLOG2` reach here.
bool Unused;
- V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
- return V;
+ Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
+ return Result;
}
-bool CombinerHelper::matchCombineConstantFoldFpUnary(
- MachineInstr &MI, std::optional<APFloat> &Cst) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
- return Cst.has_value();
-}
-
-void CombinerHelper::applyCombineConstantFoldFpUnary(
- MachineInstr &MI, std::optional<APFloat> &Cst) {
- assert(Cst && "Optional is unexpectedly empty!");
+void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ const ConstantFP *Cst) {
Builder.setInstrAndDebugLoc(MI);
- MachineFunction &MF = Builder.getMF();
- auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
- Register DstReg = MI.getOperand(0).getReg();
- Builder.buildFConstant(DstReg, *FPVal);
+ APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
+ const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
+ Builder.buildFConstant(MI.getOperand(0), *NewCst);
MI.eraseFromParent();
}
@@ -1621,6 +1622,41 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
MI.eraseFromParent();
}
+bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
+ // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+ auto &Shl = cast<GenericMachineInstr>(MI);
+ Register DstReg = Shl.getReg(0);
+ Register SrcReg = Shl.getReg(1);
+ Register ShiftReg = Shl.getReg(2);
+ Register X, C1;
+
+ if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
+ return false;
+
+ if (!mi_match(SrcReg, MRI,
+ m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X), m_Reg(C1)),
+ m_GOr(m_Reg(X), m_Reg(C1))))))
+ return false;
+
+ APInt C1Val, C2Val;
+ if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
+ !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
+ return false;
+
+ auto *SrcDef = MRI.getVRegDef(SrcReg);
+ assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
+ SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
+ LLT SrcTy = MRI.getType(SrcReg);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto S1 = B.buildShl(SrcTy, X, ShiftReg);
+ auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
+ B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
@@ -1658,9 +1694,9 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
!mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
return false;
- // TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
+ MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
+ auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -1675,12 +1711,13 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
return false;
}
- int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
MatchData.Reg = ExtSrc;
MatchData.Imm = ShiftAmt;
- unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
- return MinLeadingZeros >= ShiftAmt;
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
+ unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
+ return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
}
void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
@@ -1763,6 +1800,15 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
Register DstReg = MI.getOperand(Idx).getReg();
Register SrcReg = Operands[Idx];
+
+ // This combine may run after RegBankSelect, so we need to be aware of
+ // register banks.
+ const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
+ if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
+ SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
+ MRI.setRegClassOrRegBank(SrcReg, DstCB);
+ }
+
if (CanReuseInputDirectly)
replaceRegWith(MRI, DstReg, SrcReg);
else
@@ -2426,10 +2472,7 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
return true;
}
-bool CombinerHelper::eraseInst(MachineInstr &MI) {
- MI.eraseFromParent();
- return true;
-}
+void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
const MachineOperand &MOP2) {
@@ -2537,7 +2580,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
MaybeCst->getSExtValue() == C;
}
-bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
+void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
unsigned OpIdx) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
@@ -2545,17 +2588,15 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
- return true;
}
-bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
Register Replacement) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
- return true;
}
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
@@ -2590,36 +2631,32 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
}
-bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildFConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
+void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildUndef(MI.getOperand(0));
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchSimplifyAddToSub(
@@ -2750,9 +2787,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
Register Y = RightHandInst->getOperand(1).getReg();
LLT XTy = MRI.getType(X);
LLT YTy = MRI.getType(Y);
- if (XTy != YTy)
- return false;
- if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ if (!XTy.isValid() || XTy != YTy)
return false;
// Optional extra source register.
@@ -2779,6 +2814,9 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
}
}
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
// Record the steps to build the new instructions.
//
// Steps to build (logic x, y)
@@ -3227,7 +3265,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
/// \p SelectOperand is the operand in binary operator \p MI that is the select
/// to fold.
-bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
+void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
const unsigned &SelectOperand) {
Builder.setInstrAndDebugLoc(MI);
@@ -3263,8 +3301,6 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
MI.eraseFromParent();
-
- return true;
}
std::optional<SmallVector<Register, 8>>
@@ -3612,275 +3648,6 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
-/// Check if the store \p Store is a truncstore that can be merged. That is,
-/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
-/// Register then it does not need to match and SrcVal is set to the source
-/// value found.
-/// On match, returns the start byte offset of the \p SrcVal that is being
-/// stored.
-static std::optional<int64_t>
-getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
- MachineRegisterInfo &MRI) {
- Register TruncVal;
- if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
- return std::nullopt;
-
- // The shift amount must be a constant multiple of the narrow type.
- // It is translated to the offset address in the wide source value "y".
- //
- // x = G_LSHR y, ShiftAmtC
- // s8 z = G_TRUNC x
- // store z, ...
- Register FoundSrcVal;
- int64_t ShiftAmt;
- if (!mi_match(TruncVal, MRI,
- m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
- m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
- if (!SrcVal.isValid() || TruncVal == SrcVal) {
- if (!SrcVal.isValid())
- SrcVal = TruncVal;
- return 0; // If it's the lowest index store.
- }
- return std::nullopt;
- }
-
- unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
- if (ShiftAmt % NarrowBits!= 0)
- return std::nullopt;
- const unsigned Offset = ShiftAmt / NarrowBits;
-
- if (SrcVal.isValid() && FoundSrcVal != SrcVal)
- return std::nullopt;
-
- if (!SrcVal.isValid())
- SrcVal = FoundSrcVal;
- else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
- return std::nullopt;
- return Offset;
-}
-
-/// Match a pattern where a wide type scalar value is stored by several narrow
-/// stores. Fold it into a single store or a BSWAP and a store if the targets
-/// supports it.
-///
-/// Assuming little endian target:
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 0) & 0xFF;
-/// p[1] = (val >> 8) & 0xFF;
-/// p[2] = (val >> 16) & 0xFF;
-/// p[3] = (val >> 24) & 0xFF;
-/// =>
-/// *((i32)p) = val;
-///
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 24) & 0xFF;
-/// p[1] = (val >> 16) & 0xFF;
-/// p[2] = (val >> 8) & 0xFF;
-/// p[3] = (val >> 0) & 0xFF;
-/// =>
-/// *((i32)p) = BSWAP(val);
-bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
- auto &StoreMI = cast<GStore>(MI);
- LLT MemTy = StoreMI.getMMO().getMemoryType();
-
- // We only handle merging simple stores of 1-4 bytes.
- if (!MemTy.isScalar())
- return false;
- switch (MemTy.getSizeInBits()) {
- case 8:
- case 16:
- case 32:
- break;
- default:
- return false;
- }
- if (!StoreMI.isSimple())
- return false;
-
- // We do a simple search for mergeable stores prior to this one.
- // Any potential alias hazard along the way terminates the search.
- SmallVector<GStore *> FoundStores;
-
- // We're looking for:
- // 1) a (store(trunc(...)))
- // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
- // the partial value stored.
- // 3) where the offsets form either a little or big-endian sequence.
-
- auto &LastStore = StoreMI;
-
- // The single base pointer that all stores must use.
- Register BaseReg;
- int64_t LastOffset;
- if (!mi_match(LastStore.getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
- BaseReg = LastStore.getPointerReg();
- LastOffset = 0;
- }
-
- GStore *LowestIdxStore = &LastStore;
- int64_t LowestIdxOffset = LastOffset;
-
- Register WideSrcVal;
- auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
- if (!LowestShiftAmt)
- return false; // Didn't match a trunc.
- assert(WideSrcVal.isValid());
-
- LLT WideStoreTy = MRI.getType(WideSrcVal);
- // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
- if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
- return false;
- const unsigned NumStoresRequired =
- WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
-
- SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
- OffsetMap[*LowestShiftAmt] = LastOffset;
- FoundStores.emplace_back(&LastStore);
-
- // Search the block up for more stores.
- // We use a search threshold of 10 instructions here because the combiner
- // works top-down within a block, and we don't want to search an unbounded
- // number of predecessor instructions trying to find matching stores.
- // If we moved this optimization into a separate pass then we could probably
- // use a more efficient search without having a hard-coded threshold.
- const int MaxInstsToCheck = 10;
- int NumInstsChecked = 0;
- for (auto II = ++LastStore.getReverseIterator();
- II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
- ++II) {
- NumInstsChecked++;
- GStore *NewStore;
- if ((NewStore = dyn_cast<GStore>(&*II))) {
- if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
- break;
- } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
- break;
- } else {
- continue; // This is a safe instruction we can look past.
- }
-
- Register NewBaseReg;
- int64_t MemOffset;
- // Check we're storing to the same base + some offset.
- if (!mi_match(NewStore->getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
- NewBaseReg = NewStore->getPointerReg();
- MemOffset = 0;
- }
- if (BaseReg != NewBaseReg)
- break;
-
- auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
- if (!ShiftByteOffset)
- break;
- if (MemOffset < LowestIdxOffset) {
- LowestIdxOffset = MemOffset;
- LowestIdxStore = NewStore;
- }
-
- // Map the offset in the store and the offset in the combined value, and
- // early return if it has been set before.
- if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
- OffsetMap[*ShiftByteOffset] != INT64_MAX)
- break;
- OffsetMap[*ShiftByteOffset] = MemOffset;
-
- FoundStores.emplace_back(NewStore);
- // Reset counter since we've found a matching inst.
- NumInstsChecked = 0;
- if (FoundStores.size() == NumStoresRequired)
- break;
- }
-
- if (FoundStores.size() != NumStoresRequired) {
- return false;
- }
-
- const auto &DL = LastStore.getMF()->getDataLayout();
- auto &C = LastStore.getMF()->getFunction().getContext();
- // Check that a store of the wide type is both allowed and fast on the target
- unsigned Fast = 0;
- bool Allowed = getTargetLowering().allowsMemoryAccess(
- C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
- if (!Allowed || !Fast)
- return false;
-
- // Check if the pieces of the value are going to the expected places in memory
- // to merge the stores.
- unsigned NarrowBits = MemTy.getScalarSizeInBits();
- auto checkOffsets = [&](bool MatchLittleEndian) {
- if (MatchLittleEndian) {
- for (unsigned i = 0; i != NumStoresRequired; ++i)
- if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- } else { // MatchBigEndian by reversing loop counter.
- for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
- ++i, --j)
- if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- }
- return true;
- };
-
- // Check if the offsets line up for the native data layout of this target.
- bool NeedBswap = false;
- bool NeedRotate = false;
- if (!checkOffsets(DL.isLittleEndian())) {
- // Special-case: check if byte offsets line up for the opposite endian.
- if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
- NeedBswap = true;
- else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
- NeedRotate = true;
- else
- return false;
- }
-
- if (NeedBswap &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
- return false;
- if (NeedRotate &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
- return false;
-
- MatchInfo.NeedBSwap = NeedBswap;
- MatchInfo.NeedRotate = NeedRotate;
- MatchInfo.LowestIdxStore = LowestIdxStore;
- MatchInfo.WideSrcVal = WideSrcVal;
- MatchInfo.FoundStores = std::move(FoundStores);
- return true;
-}
-
-void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
-
- Builder.setInstrAndDebugLoc(MI);
- Register WideSrcVal = MatchInfo.WideSrcVal;
- LLT WideStoreTy = MRI.getType(WideSrcVal);
-
- if (MatchInfo.NeedBSwap) {
- WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
- } else if (MatchInfo.NeedRotate) {
- assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
- "Unexpected type for rotate");
- auto RotAmt =
- Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
- WideSrcVal =
- Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
- }
-
- Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
- MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
- MatchInfo.LowestIdxStore->getMMO().getAlign());
-
- // Erase the old stores.
- for (auto *ST : MatchInfo.FoundStores)
- ST->eraseFromParent();
-}
-
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
@@ -4395,7 +4162,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
if (static_cast<uint64_t>(LSBImm) >= Size)
return false;
- uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
+ uint64_t Width = APInt(Size, AndImm).countr_one();
MatchInfo = [=](MachineIRBuilder &B) {
auto WidthCst = B.buildConstant(ExtractTy, Width);
auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
@@ -4496,7 +4263,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
// Calculate start position and width of the extract.
const int64_t Pos = ShrAmt;
- const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
+ const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
// It's preferable to keep the shift, rather than form G_SBFX.
// TODO: remove the G_AND via demanded bits analysis.
@@ -4695,6 +4462,62 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
return false;
}
+bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg,
+ Register OpLHS, Register OpRHS,
+ BuildFnTy &MatchInfo) {
+ LLT OpRHSTy = MRI.getType(OpRHS);
+ MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
+
+ if (OpLHSDef->getOpcode() != Opc)
+ return false;
+
+ MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
+ Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
+ Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
+
+ // If the inner op is (X op C), pull the constant out so it can be folded with
+ // other constants in the expression tree. Folding is not guaranteed so we
+ // might have (C1 op C2). In that case do not pull a constant out because it
+ // won't help and can lead to infinite loops.
+ if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
+ !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
+ if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
+ // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
+ };
+ return true;
+ }
+ if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // We don't check if the reassociation will break a legal addressing mode
+ // here since pointer arithmetic is handled by G_PTR_ADD.
+ unsigned Opc = MI.getOpcode();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
+ return true;
+ if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
+ return true;
+ return false;
+}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
@@ -4766,7 +4589,7 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
return false;
// No point in combining if there's nothing to truncate.
- unsigned NarrowWidth = Mask.countTrailingOnes();
+ unsigned NarrowWidth = Mask.countr_one();
if (NarrowWidth == WideTy.getSizeInBits())
return false;
LLT NarrowTy = LLT::scalar(NarrowWidth);
@@ -4956,7 +4779,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
// Magic algorithm doesn't work for division by 1. We need to emit a select
// at the end.
// TODO: Use undef values for divisor of 1.
- if (!Divisor.isOneValue()) {
+ if (!Divisor.isOne()) {
UnsignedDivisionByConstantInfo magics =
UnsignedDivisionByConstantInfo::get(Divisor);
@@ -5144,7 +4967,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
auto *CI = cast<ConstantInt>(C);
APInt Divisor = CI->getValue();
- unsigned Shift = Divisor.countTrailingZeros();
+ unsigned Shift = Divisor.countr_zero();
if (Shift) {
Divisor.ashrInPlace(Shift);
UseSRA = true;
@@ -6185,6 +6008,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
return CmpInst::isEquality(Pred) && Y.isValid();
}
+bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
+ Register ShiftReg = MI.getOperand(2).getReg();
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
+ auto IsShiftTooBig = [&](const Constant *C) {
+ auto *CI = dyn_cast<ConstantInt>(C);
+ return CI && CI->uge(ResTy.getScalarSizeInBits());
+ };
+ return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;