src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-09-02 21:17:18 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-12-08 17:34:50 +0000
commit	06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch)
tree	62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
parent	cf037972ea8863e2bab7461d77345367d2c1e054 (diff)
parent	7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)
download	src-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.tar.gz src-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.zip

Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

535

1 files changed, 184 insertions, 351 deletions

diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index af4bb1634746..cc7fb3ee1109 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

@@ -16,7 +16,7 @@

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

-#include "llvm/CodeGen/LowLevelType.h"

+#include "llvm/CodeGen/LowLevelTypeUtils.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineInstr.h"

@@ -399,7 +399,8 @@ namespace {

/// Select a preference between two uses. CurrentUse is the current preference

/// while *ForCandidate is attributes of the candidate under consideration.

-PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,

+PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,

+ PreferredTuple &CurrentUse,

const LLT TyForCandidate,

unsigned OpcodeForCandidate,

MachineInstr *MIForCandidate) {

@@ -425,8 +426,10 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,

return {TyForCandidate, OpcodeForCandidate, MIForCandidate};

// Prefer sign extensions to zero extensions as sign-extensions tend to be

- // more expensive.

- if (CurrentUse.Ty == TyForCandidate) {

+ // more expensive. Don't do this if the load is already a zero-extend load

+ // though, otherwise we'll rewrite a zero-extend load into a sign-extend

+ // later.

+ if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {

if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&

OpcodeForCandidate == TargetOpcode::G_ZEXT)

return CurrentUse;

@@ -535,7 +538,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,

// For non power-of-2 types, they will very likely be legalized into multiple

// loads. Don't bother trying to match them into extending loads.

- if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))

+ if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))

return false;

// Find the preferred type aside from the any-extends (unless it's the only

@@ -566,7 +569,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,

.Action != LegalizeActions::Legal)

continue;

}

- Preferred = ChoosePreferredUse(Preferred,

+ Preferred = ChoosePreferredUse(MI, Preferred,

MRI.getType(UseMI.getOperand(0).getReg()),

UseMI.getOpcode(), &UseMI);

}

@@ -727,7 +730,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,

unsigned RegSize = RegTy.getSizeInBits();

uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();

- unsigned MaskSizeBits = MaskVal.countTrailingOnes();

+ unsigned MaskSizeBits = MaskVal.countr_one();

// The mask may not be larger than the in-memory type, as it might cover sign

// extended bits

@@ -1189,16 +1192,22 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI,

Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;

// Check which instruction is first in the block so we don't break def-use

- // deps by "moving" the instruction incorrectly.

- if (dominates(MI, *OtherMI))

+ // deps by "moving" the instruction incorrectly. Also keep track of which

+ // instruction is first so we pick it's operands, avoiding use-before-def

+ // bugs.

+ MachineInstr *FirstInst;

+ if (dominates(MI, *OtherMI)) {

Builder.setInstrAndDebugLoc(MI);

- else

+ FirstInst = &MI;

+ } else {

Builder.setInstrAndDebugLoc(*OtherMI);

+ FirstInst = OtherMI;

+ }

Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM

: TargetOpcode::G_UDIVREM,

{DestDivReg, DestRemReg},

- {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});

+ { FirstInst->getOperand(1), FirstInst->getOperand(2) });

MI.eraseFromParent();

OtherMI->eraseFromParent();

}

@@ -1285,65 +1294,57 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {

LegalizerHelper::LegalizeResult::Legalized;

}

-static std::optional<APFloat>

-constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op,

- const MachineRegisterInfo &MRI) {

- const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);

- if (!MaybeCst)

- return std::nullopt;

- APFloat V = MaybeCst->getValueAPF();

- switch (Opcode) {

+static APFloat constantFoldFpUnary(const MachineInstr &MI,

+ const MachineRegisterInfo &MRI,

+ const APFloat &Val) {

+ APFloat Result(Val);

+ switch (MI.getOpcode()) {

default:

llvm_unreachable("Unexpected opcode!");

case TargetOpcode::G_FNEG: {

- V.changeSign();

- return V;

+ Result.changeSign();

+ return Result;

}

case TargetOpcode::G_FABS: {

- V.clearSign();

- return V;

+ Result.clearSign();

+ return Result;

+ }

+ case TargetOpcode::G_FPTRUNC: {

+ bool Unused;

+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

+ Result.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven,

+ &Unused);

+ return Result;

}

- case TargetOpcode::G_FPTRUNC:

- break;

case TargetOpcode::G_FSQRT: {

bool Unused;

- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);

- V = APFloat(sqrt(V.convertToDouble()));

+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,

+ &Unused);

+ Result = APFloat(sqrt(Result.convertToDouble()));

break;

}

case TargetOpcode::G_FLOG2: {

bool Unused;

- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);

- V = APFloat(log2(V.convertToDouble()));

+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,

+ &Unused);

+ Result = APFloat(log2(Result.convertToDouble()));

break;

}

// Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,

- // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,

- // and `G_FLOG2` reach here.

+ // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and

+ // `G_FLOG2` reach here.

bool Unused;

- V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);

- return V;

+ Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);

+ return Result;

}

-bool CombinerHelper::matchCombineConstantFoldFpUnary(

- MachineInstr &MI, std::optional<APFloat> &Cst) {

- Register DstReg = MI.getOperand(0).getReg();

- Register SrcReg = MI.getOperand(1).getReg();

- LLT DstTy = MRI.getType(DstReg);

- Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);

- return Cst.has_value();

-void CombinerHelper::applyCombineConstantFoldFpUnary(

- MachineInstr &MI, std::optional<APFloat> &Cst) {

- assert(Cst && "Optional is unexpectedly empty!");

+void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,

+ const ConstantFP *Cst) {

Builder.setInstrAndDebugLoc(MI);

- MachineFunction &MF = Builder.getMF();

- auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);

- Register DstReg = MI.getOperand(0).getReg();

- Builder.buildFConstant(DstReg, *FPVal);

+ APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());

+ const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);

+ Builder.buildFConstant(MI.getOperand(0), *NewCst);

MI.eraseFromParent();

}

@@ -1621,6 +1622,41 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,

MI.eraseFromParent();

}

+bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");

+ // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)

+ // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)

+ auto &Shl = cast<GenericMachineInstr>(MI);

+ Register DstReg = Shl.getReg(0);

+ Register SrcReg = Shl.getReg(1);

+ Register ShiftReg = Shl.getReg(2);

+ Register X, C1;

+ if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))

+ return false;

+ if (!mi_match(SrcReg, MRI,

+ m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X), m_Reg(C1)),

+ m_GOr(m_Reg(X), m_Reg(C1))))))

+ return false;

+ APInt C1Val, C2Val;

+ if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||

+ !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))

+ return false;

+ auto *SrcDef = MRI.getVRegDef(SrcReg);

+ assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||

+ SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");

+ LLT SrcTy = MRI.getType(SrcReg);

+ MatchInfo = [=](MachineIRBuilder &B) {

+ auto S1 = B.buildShl(SrcTy, X, ShiftReg);

+ auto S2 = B.buildShl(SrcTy, C1, ShiftReg);

+ B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});

+ };

+ return true;

bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,

unsigned &ShiftVal) {

assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");

@@ -1658,9 +1694,9 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,

!mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))

return false;

- // TODO: Should handle vector splat.

- auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);

+ MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);

+ auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);

if (!MaybeShiftAmtVal)

return false;

@@ -1675,12 +1711,13 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,

return false;

}

- int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();

+ int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();

MatchData.Reg = ExtSrc;

MatchData.Imm = ShiftAmt;

- unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();

- return MinLeadingZeros >= ShiftAmt;

+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();

+ unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();

+ return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;

}

void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,

@@ -1763,6 +1800,15 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues(

for (unsigned Idx = 0; Idx < NumElems; ++Idx) {

+ // This combine may run after RegBankSelect, so we need to be aware of

+ // register banks.

+ const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);

+ if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {

+ SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);

+ MRI.setRegClassOrRegBank(SrcReg, DstCB);

+ }

if (CanReuseInputDirectly)

replaceRegWith(MRI, DstReg, SrcReg);

else

@@ -2426,10 +2472,7 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {

return true;

}

-bool CombinerHelper::eraseInst(MachineInstr &MI) {

- MI.eraseFromParent();

- return true;

+void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }

bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,

const MachineOperand &MOP2) {

@@ -2537,7 +2580,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {

MaybeCst->getSExtValue() == C;

}

-bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,

+void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,

unsigned OpIdx) {

assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");

@@ -2545,17 +2588,15 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,

assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");

MI.eraseFromParent();

replaceRegWith(MRI, OldReg, Replacement);

- return true;

}

-bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,

+void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,

assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");

assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");

MI.eraseFromParent();

replaceRegWith(MRI, OldReg, Replacement);

- return true;

}

bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {

@@ -2590,36 +2631,32 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,

return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);

}

-bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {

+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {

assert(MI.getNumDefs() == 1 && "Expected only one def?");

Builder.setInstr(MI);

Builder.buildFConstant(MI.getOperand(0), C);

MI.eraseFromParent();

- return true;

}

-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {

+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {

assert(MI.getNumDefs() == 1 && "Expected only one def?");

Builder.setInstr(MI);

Builder.buildConstant(MI.getOperand(0), C);

MI.eraseFromParent();

- return true;

}

-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {

+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {

assert(MI.getNumDefs() == 1 && "Expected only one def?");

Builder.setInstr(MI);

Builder.buildConstant(MI.getOperand(0), C);

MI.eraseFromParent();

- return true;

}

-bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {

+void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {

assert(MI.getNumDefs() == 1 && "Expected only one def?");

Builder.setInstr(MI);

Builder.buildUndef(MI.getOperand(0));

MI.eraseFromParent();

- return true;

}

bool CombinerHelper::matchSimplifyAddToSub(

@@ -2750,9 +2787,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(

LLT XTy = MRI.getType(X);

LLT YTy = MRI.getType(Y);

- if (XTy != YTy)

- return false;

- if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))

+ if (!XTy.isValid() || XTy != YTy)

return false;

// Optional extra source register.

@@ -2779,6 +2814,9 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(

}

+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))

+ return false;

// Record the steps to build the new instructions.

// Steps to build (logic x, y)

@@ -3227,7 +3265,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,

/// \p SelectOperand is the operand in binary operator \p MI that is the select

/// to fold.

-bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,

+void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,

const unsigned &SelectOperand) {

Builder.setInstrAndDebugLoc(MI);

@@ -3263,8 +3301,6 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,

Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());

MI.eraseFromParent();

- return true;

}

std::optional<SmallVector<Register, 8>>

@@ -3612,275 +3648,6 @@ bool CombinerHelper::matchLoadOrCombine(

return true;

}

-/// Check if the store \p Store is a truncstore that can be merged. That is,

-/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty

-/// Register then it does not need to match and SrcVal is set to the source

-/// value found.

-/// On match, returns the start byte offset of the \p SrcVal that is being

-/// stored.

-static std::optional<int64_t>

-getTruncStoreByteOffset(GStore &Store, Register &SrcVal,

- MachineRegisterInfo &MRI) {

- Register TruncVal;

- if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))

- return std::nullopt;

- // The shift amount must be a constant multiple of the narrow type.

- // It is translated to the offset address in the wide source value "y".

- //

- // x = G_LSHR y, ShiftAmtC

- // s8 z = G_TRUNC x

- // store z, ...

- Register FoundSrcVal;

- int64_t ShiftAmt;

- if (!mi_match(TruncVal, MRI,

- m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),

- m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {

- if (!SrcVal.isValid() || TruncVal == SrcVal) {

- if (!SrcVal.isValid())

- SrcVal = TruncVal;

- return 0; // If it's the lowest index store.

- }

- return std::nullopt;

- }

- unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();

- if (ShiftAmt % NarrowBits!= 0)

- return std::nullopt;

- const unsigned Offset = ShiftAmt / NarrowBits;

- if (SrcVal.isValid() && FoundSrcVal != SrcVal)

- return std::nullopt;

- if (!SrcVal.isValid())

- SrcVal = FoundSrcVal;

- else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))

- return std::nullopt;

- return Offset;

-/// Match a pattern where a wide type scalar value is stored by several narrow

-/// stores. Fold it into a single store or a BSWAP and a store if the targets

-/// supports it.

-///

-/// Assuming little endian target:

-/// i8 *p = ...

-/// i32 val = ...

-/// p[0] = (val >> 0) & 0xFF;

-/// p[1] = (val >> 8) & 0xFF;

-/// p[2] = (val >> 16) & 0xFF;

-/// p[3] = (val >> 24) & 0xFF;

-/// =>

-/// *((i32)p) = val;

-///

-/// i8 *p = ...

-/// i32 val = ...

-/// p[0] = (val >> 24) & 0xFF;

-/// p[1] = (val >> 16) & 0xFF;

-/// p[2] = (val >> 8) & 0xFF;

-/// p[3] = (val >> 0) & 0xFF;

-/// =>

-/// *((i32)p) = BSWAP(val);

-bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,

- MergeTruncStoresInfo &MatchInfo) {

- auto &StoreMI = cast<GStore>(MI);

- LLT MemTy = StoreMI.getMMO().getMemoryType();

- // We only handle merging simple stores of 1-4 bytes.

- if (!MemTy.isScalar())

- return false;

- switch (MemTy.getSizeInBits()) {

- case 8:

- case 16:

- case 32:

- break;

- default:

- return false;

- }

- if (!StoreMI.isSimple())

- return false;

- // We do a simple search for mergeable stores prior to this one.

- // Any potential alias hazard along the way terminates the search.

- SmallVector<GStore *> FoundStores;

- // We're looking for:

- // 1) a (store(trunc(...)))

- // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get

- // the partial value stored.

- // 3) where the offsets form either a little or big-endian sequence.

- auto &LastStore = StoreMI;

- // The single base pointer that all stores must use.

- Register BaseReg;

- int64_t LastOffset;

- if (!mi_match(LastStore.getPointerReg(), MRI,

- m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {

- BaseReg = LastStore.getPointerReg();

- LastOffset = 0;

- }

- GStore *LowestIdxStore = &LastStore;

- int64_t LowestIdxOffset = LastOffset;

- Register WideSrcVal;

- auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);

- if (!LowestShiftAmt)

- return false; // Didn't match a trunc.

- assert(WideSrcVal.isValid());

- LLT WideStoreTy = MRI.getType(WideSrcVal);

- // The wide type might not be a multiple of the memory type, e.g. s48 and s32.

- if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)

- return false;

- const unsigned NumStoresRequired =

- WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();

- SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);

- OffsetMap[*LowestShiftAmt] = LastOffset;

- FoundStores.emplace_back(&LastStore);

- // Search the block up for more stores.

- // We use a search threshold of 10 instructions here because the combiner

- // works top-down within a block, and we don't want to search an unbounded

- // number of predecessor instructions trying to find matching stores.

- // If we moved this optimization into a separate pass then we could probably

- // use a more efficient search without having a hard-coded threshold.

- const int MaxInstsToCheck = 10;

- int NumInstsChecked = 0;

- for (auto II = ++LastStore.getReverseIterator();

- II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;

- ++II) {

- NumInstsChecked++;

- GStore *NewStore;

- if ((NewStore = dyn_cast<GStore>(&*II))) {

- if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())

- break;

- } else if (II->isLoadFoldBarrier() || II->mayLoad()) {

- break;

- } else {

- continue; // This is a safe instruction we can look past.

- }

- Register NewBaseReg;

- int64_t MemOffset;

- // Check we're storing to the same base + some offset.

- if (!mi_match(NewStore->getPointerReg(), MRI,

- m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {

- NewBaseReg = NewStore->getPointerReg();

- MemOffset = 0;

- }

- if (BaseReg != NewBaseReg)

- break;

- auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);

- if (!ShiftByteOffset)

- break;

- if (MemOffset < LowestIdxOffset) {

- LowestIdxOffset = MemOffset;

- LowestIdxStore = NewStore;

- }

- // Map the offset in the store and the offset in the combined value, and

- // early return if it has been set before.

- if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||

- OffsetMap[*ShiftByteOffset] != INT64_MAX)

- break;

- OffsetMap[*ShiftByteOffset] = MemOffset;

- FoundStores.emplace_back(NewStore);

- // Reset counter since we've found a matching inst.

- NumInstsChecked = 0;

- if (FoundStores.size() == NumStoresRequired)

- break;

- }

- if (FoundStores.size() != NumStoresRequired) {

- return false;

- }

- const auto &DL = LastStore.getMF()->getDataLayout();

- auto &C = LastStore.getMF()->getFunction().getContext();

- // Check that a store of the wide type is both allowed and fast on the target

- unsigned Fast = 0;

- bool Allowed = getTargetLowering().allowsMemoryAccess(

- C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);

- if (!Allowed || !Fast)

- return false;

- // Check if the pieces of the value are going to the expected places in memory

- // to merge the stores.

- unsigned NarrowBits = MemTy.getScalarSizeInBits();

- auto checkOffsets = [&](bool MatchLittleEndian) {

- if (MatchLittleEndian) {

- for (unsigned i = 0; i != NumStoresRequired; ++i)

- if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)

- return false;

- } else { // MatchBigEndian by reversing loop counter.

- for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;

- ++i, --j)

- if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)

- return false;

- }

- return true;

- };

- // Check if the offsets line up for the native data layout of this target.

- bool NeedBswap = false;

- bool NeedRotate = false;

- if (!checkOffsets(DL.isLittleEndian())) {

- // Special-case: check if byte offsets line up for the opposite endian.

- if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))

- NeedBswap = true;

- else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))

- NeedRotate = true;

- else

- return false;

- }

- if (NeedBswap &&

- !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))

- return false;

- if (NeedRotate &&

- !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))

- return false;

- MatchInfo.NeedBSwap = NeedBswap;

- MatchInfo.NeedRotate = NeedRotate;

- MatchInfo.LowestIdxStore = LowestIdxStore;

- MatchInfo.WideSrcVal = WideSrcVal;

- MatchInfo.FoundStores = std::move(FoundStores);

- return true;

-void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,

- MergeTruncStoresInfo &MatchInfo) {

- Builder.setInstrAndDebugLoc(MI);

- Register WideSrcVal = MatchInfo.WideSrcVal;

- LLT WideStoreTy = MRI.getType(WideSrcVal);

- if (MatchInfo.NeedBSwap) {

- WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);

- } else if (MatchInfo.NeedRotate) {

- assert(WideStoreTy.getSizeInBits() % 2 == 0 &&

- "Unexpected type for rotate");

- auto RotAmt =

- Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);

- WideSrcVal =

- Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);

- }

- Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),

- MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),

- MatchInfo.LowestIdxStore->getMMO().getAlign());

- // Erase the old stores.

- for (auto *ST : MatchInfo.FoundStores)

- ST->eraseFromParent();

bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,

MachineInstr *&ExtMI) {

assert(MI.getOpcode() == TargetOpcode::G_PHI);

@@ -4395,7 +4162,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(

if (static_cast<uint64_t>(LSBImm) >= Size)

return false;

- uint64_t Width = APInt(Size, AndImm).countTrailingOnes();

+ uint64_t Width = APInt(Size, AndImm).countr_one();

MatchInfo = [=](MachineIRBuilder &B) {

auto WidthCst = B.buildConstant(ExtractTy, Width);

auto LSBCst = B.buildConstant(ExtractTy, LSBImm);

@@ -4496,7 +4263,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(

// Calculate start position and width of the extract.

const int64_t Pos = ShrAmt;

- const int64_t Width = countTrailingOnes(UMask) - ShrAmt;

+ const int64_t Width = llvm::countr_one(UMask) - ShrAmt;

// It's preferable to keep the shift, rather than form G_SBFX.

// TODO: remove the G_AND via demanded bits analysis.

@@ -4695,6 +4462,62 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,

return false;

}

+bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg,

+ Register OpLHS, Register OpRHS,

+ BuildFnTy &MatchInfo) {

+ LLT OpRHSTy = MRI.getType(OpRHS);

+ MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);

+ if (OpLHSDef->getOpcode() != Opc)

+ return false;

+ MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);

+ Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();

+ Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();

+ // If the inner op is (X op C), pull the constant out so it can be folded with

+ // other constants in the expression tree. Folding is not guaranteed so we

+ // might have (C1 op C2). In that case do not pull a constant out because it

+ // won't help and can lead to infinite loops.

+ if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&

+ !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {

+ if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {

+ // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))

+ MatchInfo = [=](MachineIRBuilder &B) {

+ auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});

+ B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});

+ };

+ return true;

+ }

+ if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {

+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)

+ // iff (op x, c1) has one use

+ MatchInfo = [=](MachineIRBuilder &B) {

+ auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});

+ B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});

+ };

+ return true;

+ }

+ return false;

+bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,

+ BuildFnTy &MatchInfo) {

+ // We don't check if the reassociation will break a legal addressing mode

+ // here since pointer arithmetic is handled by G_PTR_ADD.

+ unsigned Opc = MI.getOpcode();

+ Register DstReg = MI.getOperand(0).getReg();

+ Register LHSReg = MI.getOperand(1).getReg();

+ Register RHSReg = MI.getOperand(2).getReg();

+ if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))

+ return true;

+ if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))

+ return true;

+ return false;

bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {

@@ -4766,7 +4589,7 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(

return false;

// No point in combining if there's nothing to truncate.

- unsigned NarrowWidth = Mask.countTrailingOnes();

+ unsigned NarrowWidth = Mask.countr_one();

if (NarrowWidth == WideTy.getSizeInBits())

return false;

LLT NarrowTy = LLT::scalar(NarrowWidth);

@@ -4956,7 +4779,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {

// Magic algorithm doesn't work for division by 1. We need to emit a select

// at the end.

// TODO: Use undef values for divisor of 1.

- if (!Divisor.isOneValue()) {

+ if (!Divisor.isOne()) {

UnsignedDivisionByConstantInfo magics =

UnsignedDivisionByConstantInfo::get(Divisor);

@@ -5144,7 +4967,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {

auto *CI = cast<ConstantInt>(C);

APInt Divisor = CI->getValue();

- unsigned Shift = Divisor.countTrailingZeros();

+ unsigned Shift = Divisor.countr_zero();

if (Shift) {

Divisor.ashrInPlace(Shift);

UseSRA = true;

@@ -6185,6 +6008,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,

return CmpInst::isEquality(Pred) && Y.isValid();

}

+bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {

+ Register ShiftReg = MI.getOperand(2).getReg();

+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());

+ auto IsShiftTooBig = [&](const Constant *C) {

+ auto *CI = dyn_cast<ConstantInt>(C);

+ return CI && CI->uge(ResTy.getScalarSizeInBits());

+ };

+ return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);

bool CombinerHelper::tryCombine(MachineInstr &MI) {

if (tryCombineCopy(MI))

return true;