diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-09-02 21:17:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-12-08 17:34:50 +0000 |
commit | 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch) | |
tree | 62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | |
parent | cf037972ea8863e2bab7461d77345367d2c1e054 (diff) | |
parent | 7fa27ce4a07f19b07799a767fc29416f3b625afb (diff) | |
download | src-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.tar.gz src-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.zip |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 535 |
1 files changed, 184 insertions, 351 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index af4bb1634746..cc7fb3ee1109 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -16,7 +16,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" @@ -399,7 +399,8 @@ namespace { /// Select a preference between two uses. CurrentUse is the current preference /// while *ForCandidate is attributes of the candidate under consideration. -PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, +PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI, + PreferredTuple &CurrentUse, const LLT TyForCandidate, unsigned OpcodeForCandidate, MachineInstr *MIForCandidate) { @@ -425,8 +426,10 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; // Prefer sign extensions to zero extensions as sign-extensions tend to be - // more expensive. - if (CurrentUse.Ty == TyForCandidate) { + // more expensive. Don't do this if the load is already a zero-extend load + // though, otherwise we'll rewrite a zero-extend load into a sign-extend + // later. + if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) { if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && OpcodeForCandidate == TargetOpcode::G_ZEXT) return CurrentUse; @@ -535,7 +538,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, // For non power-of-2 types, they will very likely be legalized into multiple // loads. Don't bother trying to match them into extending loads. - if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) + if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits())) return false; // Find the preferred type aside from the any-extends (unless it's the only @@ -566,7 +569,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, .Action != LegalizeActions::Legal) continue; } - Preferred = ChoosePreferredUse(Preferred, + Preferred = ChoosePreferredUse(MI, Preferred, MRI.getType(UseMI.getOperand(0).getReg()), UseMI.getOpcode(), &UseMI); } @@ -727,7 +730,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, Register PtrReg = LoadMI->getPointerReg(); unsigned RegSize = RegTy.getSizeInBits(); uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); - unsigned MaskSizeBits = MaskVal.countTrailingOnes(); + unsigned MaskSizeBits = MaskVal.countr_one(); // The mask may not be larger than the in-memory type, as it might cover sign // extended bits @@ -1189,16 +1192,22 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI, Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM; // Check which instruction is first in the block so we don't break def-use - // deps by "moving" the instruction incorrectly. - if (dominates(MI, *OtherMI)) + // deps by "moving" the instruction incorrectly. Also keep track of which + // instruction is first so we pick it's operands, avoiding use-before-def + // bugs. + MachineInstr *FirstInst; + if (dominates(MI, *OtherMI)) { Builder.setInstrAndDebugLoc(MI); - else + FirstInst = &MI; + } else { Builder.setInstrAndDebugLoc(*OtherMI); + FirstInst = OtherMI; + } Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM : TargetOpcode::G_UDIVREM, {DestDivReg, DestRemReg}, - {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); + { FirstInst->getOperand(1), FirstInst->getOperand(2) }); MI.eraseFromParent(); OtherMI->eraseFromParent(); } @@ -1285,65 +1294,57 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { LegalizerHelper::LegalizeResult::Legalized; } -static std::optional<APFloat> -constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op, - const MachineRegisterInfo &MRI) { - const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); - if (!MaybeCst) - return std::nullopt; - - APFloat V = MaybeCst->getValueAPF(); - switch (Opcode) { +static APFloat constantFoldFpUnary(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const APFloat &Val) { + APFloat Result(Val); + switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); case TargetOpcode::G_FNEG: { - V.changeSign(); - return V; + Result.changeSign(); + return Result; } case TargetOpcode::G_FABS: { - V.clearSign(); - return V; + Result.clearSign(); + return Result; + } + case TargetOpcode::G_FPTRUNC: { + bool Unused; + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Result.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, + &Unused); + return Result; } - case TargetOpcode::G_FPTRUNC: - break; case TargetOpcode::G_FSQRT: { bool Unused; - V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); - V = APFloat(sqrt(V.convertToDouble())); + Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &Unused); + Result = APFloat(sqrt(Result.convertToDouble())); break; } case TargetOpcode::G_FLOG2: { bool Unused; - V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); - V = APFloat(log2(V.convertToDouble())); + Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &Unused); + Result = APFloat(log2(Result.convertToDouble())); break; } } // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, - // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, - // and `G_FLOG2` reach here. + // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and + // `G_FLOG2` reach here. bool Unused; - V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); - return V; + Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused); + return Result; } -bool CombinerHelper::matchCombineConstantFoldFpUnary( - MachineInstr &MI, std::optional<APFloat> &Cst) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); - return Cst.has_value(); -} - -void CombinerHelper::applyCombineConstantFoldFpUnary( - MachineInstr &MI, std::optional<APFloat> &Cst) { - assert(Cst && "Optional is unexpectedly empty!"); +void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, + const ConstantFP *Cst) { Builder.setInstrAndDebugLoc(MI); - MachineFunction &MF = Builder.getMF(); - auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); - Register DstReg = MI.getOperand(0).getReg(); - Builder.buildFConstant(DstReg, *FPVal); + APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue()); + const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded); + Builder.buildFConstant(MI.getOperand(0), *NewCst); MI.eraseFromParent(); } @@ -1621,6 +1622,41 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, MI.eraseFromParent(); } +bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL"); + // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) + auto &Shl = cast<GenericMachineInstr>(MI); + Register DstReg = Shl.getReg(0); + Register SrcReg = Shl.getReg(1); + Register ShiftReg = Shl.getReg(2); + Register X, C1; + + if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize())) + return false; + + if (!mi_match(SrcReg, MRI, + m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X), m_Reg(C1)), + m_GOr(m_Reg(X), m_Reg(C1)))))) + return false; + + APInt C1Val, C2Val; + if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) || + !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val))) + return false; + + auto *SrcDef = MRI.getVRegDef(SrcReg); + assert((SrcDef->getOpcode() == TargetOpcode::G_ADD || + SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op"); + LLT SrcTy = MRI.getType(SrcReg); + MatchInfo = [=](MachineIRBuilder &B) { + auto S1 = B.buildShl(SrcTy, X, ShiftReg); + auto S2 = B.buildShl(SrcTy, C1, ShiftReg); + B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2}); + }; + return true; +} + bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); @@ -1658,9 +1694,9 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) return false; - // TODO: Should handle vector splat. Register RHS = MI.getOperand(2).getReg(); - auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI); + MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS); + auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI); if (!MaybeShiftAmtVal) return false; @@ -1675,12 +1711,13 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, return false; } - int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); + int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue(); MatchData.Reg = ExtSrc; MatchData.Imm = ShiftAmt; - unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); - return MinLeadingZeros >= ShiftAmt; + unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one(); + unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits(); + return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize; } void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, @@ -1763,6 +1800,15 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues( for (unsigned Idx = 0; Idx < NumElems; ++Idx) { Register DstReg = MI.getOperand(Idx).getReg(); Register SrcReg = Operands[Idx]; + + // This combine may run after RegBankSelect, so we need to be aware of + // register banks. + const auto &DstCB = MRI.getRegClassOrRegBank(DstReg); + if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) { + SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0); + MRI.setRegClassOrRegBank(SrcReg, DstCB); + } + if (CanReuseInputDirectly) replaceRegWith(MRI, DstReg, SrcReg); else @@ -2426,10 +2472,7 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { return true; } -bool CombinerHelper::eraseInst(MachineInstr &MI) { - MI.eraseFromParent(); - return true; -} +void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); } bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) { @@ -2537,7 +2580,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { MaybeCst->getSExtValue() == C; } -bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, +void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); Register OldReg = MI.getOperand(0).getReg(); @@ -2545,17 +2588,15 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); MI.eraseFromParent(); replaceRegWith(MRI, OldReg, Replacement); - return true; } -bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, +void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); Register OldReg = MI.getOperand(0).getReg(); assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); MI.eraseFromParent(); replaceRegWith(MRI, OldReg, Replacement); - return true; } bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { @@ -2590,36 +2631,32 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); } -bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { +void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Builder.buildFConstant(MI.getOperand(0), C); MI.eraseFromParent(); - return true; } -bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { +void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Builder.buildConstant(MI.getOperand(0), C); MI.eraseFromParent(); - return true; } -bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { +void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Builder.buildConstant(MI.getOperand(0), C); MI.eraseFromParent(); - return true; } -bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { +void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Builder.buildUndef(MI.getOperand(0)); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchSimplifyAddToSub( @@ -2750,9 +2787,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( Register Y = RightHandInst->getOperand(1).getReg(); LLT XTy = MRI.getType(X); LLT YTy = MRI.getType(Y); - if (XTy != YTy) - return false; - if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) + if (!XTy.isValid() || XTy != YTy) return false; // Optional extra source register. @@ -2779,6 +2814,9 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( } } + if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) + return false; + // Record the steps to build the new instructions. // // Steps to build (logic x, y) @@ -3227,7 +3265,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI, /// \p SelectOperand is the operand in binary operator \p MI that is the select /// to fold. -bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, +void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOperand) { Builder.setInstrAndDebugLoc(MI); @@ -3263,8 +3301,6 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags()); MI.eraseFromParent(); - - return true; } std::optional<SmallVector<Register, 8>> @@ -3612,275 +3648,6 @@ bool CombinerHelper::matchLoadOrCombine( return true; } -/// Check if the store \p Store is a truncstore that can be merged. That is, -/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty -/// Register then it does not need to match and SrcVal is set to the source -/// value found. -/// On match, returns the start byte offset of the \p SrcVal that is being -/// stored. -static std::optional<int64_t> -getTruncStoreByteOffset(GStore &Store, Register &SrcVal, - MachineRegisterInfo &MRI) { - Register TruncVal; - if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) - return std::nullopt; - - // The shift amount must be a constant multiple of the narrow type. - // It is translated to the offset address in the wide source value "y". - // - // x = G_LSHR y, ShiftAmtC - // s8 z = G_TRUNC x - // store z, ... - Register FoundSrcVal; - int64_t ShiftAmt; - if (!mi_match(TruncVal, MRI, - m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), - m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { - if (!SrcVal.isValid() || TruncVal == SrcVal) { - if (!SrcVal.isValid()) - SrcVal = TruncVal; - return 0; // If it's the lowest index store. - } - return std::nullopt; - } - - unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); - if (ShiftAmt % NarrowBits!= 0) - return std::nullopt; - const unsigned Offset = ShiftAmt / NarrowBits; - - if (SrcVal.isValid() && FoundSrcVal != SrcVal) - return std::nullopt; - - if (!SrcVal.isValid()) - SrcVal = FoundSrcVal; - else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) - return std::nullopt; - return Offset; -} - -/// Match a pattern where a wide type scalar value is stored by several narrow -/// stores. Fold it into a single store or a BSWAP and a store if the targets -/// supports it. -/// -/// Assuming little endian target: -/// i8 *p = ... -/// i32 val = ... -/// p[0] = (val >> 0) & 0xFF; -/// p[1] = (val >> 8) & 0xFF; -/// p[2] = (val >> 16) & 0xFF; -/// p[3] = (val >> 24) & 0xFF; -/// => -/// *((i32)p) = val; -/// -/// i8 *p = ... -/// i32 val = ... -/// p[0] = (val >> 24) & 0xFF; -/// p[1] = (val >> 16) & 0xFF; -/// p[2] = (val >> 8) & 0xFF; -/// p[3] = (val >> 0) & 0xFF; -/// => -/// *((i32)p) = BSWAP(val); -bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI, - MergeTruncStoresInfo &MatchInfo) { - auto &StoreMI = cast<GStore>(MI); - LLT MemTy = StoreMI.getMMO().getMemoryType(); - - // We only handle merging simple stores of 1-4 bytes. - if (!MemTy.isScalar()) - return false; - switch (MemTy.getSizeInBits()) { - case 8: - case 16: - case 32: - break; - default: - return false; - } - if (!StoreMI.isSimple()) - return false; - - // We do a simple search for mergeable stores prior to this one. - // Any potential alias hazard along the way terminates the search. - SmallVector<GStore *> FoundStores; - - // We're looking for: - // 1) a (store(trunc(...))) - // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get - // the partial value stored. - // 3) where the offsets form either a little or big-endian sequence. - - auto &LastStore = StoreMI; - - // The single base pointer that all stores must use. - Register BaseReg; - int64_t LastOffset; - if (!mi_match(LastStore.getPointerReg(), MRI, - m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { - BaseReg = LastStore.getPointerReg(); - LastOffset = 0; - } - - GStore *LowestIdxStore = &LastStore; - int64_t LowestIdxOffset = LastOffset; - - Register WideSrcVal; - auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI); - if (!LowestShiftAmt) - return false; // Didn't match a trunc. - assert(WideSrcVal.isValid()); - - LLT WideStoreTy = MRI.getType(WideSrcVal); - // The wide type might not be a multiple of the memory type, e.g. s48 and s32. - if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) - return false; - const unsigned NumStoresRequired = - WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); - - SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX); - OffsetMap[*LowestShiftAmt] = LastOffset; - FoundStores.emplace_back(&LastStore); - - // Search the block up for more stores. - // We use a search threshold of 10 instructions here because the combiner - // works top-down within a block, and we don't want to search an unbounded - // number of predecessor instructions trying to find matching stores. - // If we moved this optimization into a separate pass then we could probably - // use a more efficient search without having a hard-coded threshold. - const int MaxInstsToCheck = 10; - int NumInstsChecked = 0; - for (auto II = ++LastStore.getReverseIterator(); - II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; - ++II) { - NumInstsChecked++; - GStore *NewStore; - if ((NewStore = dyn_cast<GStore>(&*II))) { - if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) - break; - } else if (II->isLoadFoldBarrier() || II->mayLoad()) { - break; - } else { - continue; // This is a safe instruction we can look past. - } - - Register NewBaseReg; - int64_t MemOffset; - // Check we're storing to the same base + some offset. - if (!mi_match(NewStore->getPointerReg(), MRI, - m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { - NewBaseReg = NewStore->getPointerReg(); - MemOffset = 0; - } - if (BaseReg != NewBaseReg) - break; - - auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI); - if (!ShiftByteOffset) - break; - if (MemOffset < LowestIdxOffset) { - LowestIdxOffset = MemOffset; - LowestIdxStore = NewStore; - } - - // Map the offset in the store and the offset in the combined value, and - // early return if it has been set before. - if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || - OffsetMap[*ShiftByteOffset] != INT64_MAX) - break; - OffsetMap[*ShiftByteOffset] = MemOffset; - - FoundStores.emplace_back(NewStore); - // Reset counter since we've found a matching inst. - NumInstsChecked = 0; - if (FoundStores.size() == NumStoresRequired) - break; - } - - if (FoundStores.size() != NumStoresRequired) { - return false; - } - - const auto &DL = LastStore.getMF()->getDataLayout(); - auto &C = LastStore.getMF()->getFunction().getContext(); - // Check that a store of the wide type is both allowed and fast on the target - unsigned Fast = 0; - bool Allowed = getTargetLowering().allowsMemoryAccess( - C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); - if (!Allowed || !Fast) - return false; - - // Check if the pieces of the value are going to the expected places in memory - // to merge the stores. - unsigned NarrowBits = MemTy.getScalarSizeInBits(); - auto checkOffsets = [&](bool MatchLittleEndian) { - if (MatchLittleEndian) { - for (unsigned i = 0; i != NumStoresRequired; ++i) - if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) - return false; - } else { // MatchBigEndian by reversing loop counter. - for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired; - ++i, --j) - if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) - return false; - } - return true; - }; - - // Check if the offsets line up for the native data layout of this target. - bool NeedBswap = false; - bool NeedRotate = false; - if (!checkOffsets(DL.isLittleEndian())) { - // Special-case: check if byte offsets line up for the opposite endian. - if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) - NeedBswap = true; - else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian())) - NeedRotate = true; - else - return false; - } - - if (NeedBswap && - !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}})) - return false; - if (NeedRotate && - !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}})) - return false; - - MatchInfo.NeedBSwap = NeedBswap; - MatchInfo.NeedRotate = NeedRotate; - MatchInfo.LowestIdxStore = LowestIdxStore; - MatchInfo.WideSrcVal = WideSrcVal; - MatchInfo.FoundStores = std::move(FoundStores); - return true; -} - -void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI, - MergeTruncStoresInfo &MatchInfo) { - - Builder.setInstrAndDebugLoc(MI); - Register WideSrcVal = MatchInfo.WideSrcVal; - LLT WideStoreTy = MRI.getType(WideSrcVal); - - if (MatchInfo.NeedBSwap) { - WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); - } else if (MatchInfo.NeedRotate) { - assert(WideStoreTy.getSizeInBits() % 2 == 0 && - "Unexpected type for rotate"); - auto RotAmt = - Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); - WideSrcVal = - Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); - } - - Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(), - MatchInfo.LowestIdxStore->getMMO().getPointerInfo(), - MatchInfo.LowestIdxStore->getMMO().getAlign()); - - // Erase the old stores. - for (auto *ST : MatchInfo.FoundStores) - ST->eraseFromParent(); -} - bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) { assert(MI.getOpcode() == TargetOpcode::G_PHI); @@ -4395,7 +4162,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd( if (static_cast<uint64_t>(LSBImm) >= Size) return false; - uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); + uint64_t Width = APInt(Size, AndImm).countr_one(); MatchInfo = [=](MachineIRBuilder &B) { auto WidthCst = B.buildConstant(ExtractTy, Width); auto LSBCst = B.buildConstant(ExtractTy, LSBImm); @@ -4496,7 +4263,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( // Calculate start position and width of the extract. const int64_t Pos = ShrAmt; - const int64_t Width = countTrailingOnes(UMask) - ShrAmt; + const int64_t Width = llvm::countr_one(UMask) - ShrAmt; // It's preferable to keep the shift, rather than form G_SBFX. // TODO: remove the G_AND via demanded bits analysis. @@ -4695,6 +4462,62 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI, return false; } +bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg, + Register OpLHS, Register OpRHS, + BuildFnTy &MatchInfo) { + LLT OpRHSTy = MRI.getType(OpRHS); + MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS); + + if (OpLHSDef->getOpcode() != Opc) + return false; + + MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS); + Register OpLHSLHS = OpLHSDef->getOperand(1).getReg(); + Register OpLHSRHS = OpLHSDef->getOperand(2).getReg(); + + // If the inner op is (X op C), pull the constant out so it can be folded with + // other constants in the expression tree. Folding is not guaranteed so we + // might have (C1 op C2). In that case do not pull a constant out because it + // won't help and can lead to infinite loops. + if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) && + !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) { + if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) { + // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2)) + MatchInfo = [=](MachineIRBuilder &B) { + auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS}); + B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst}); + }; + return true; + } + if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) { + // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) + // iff (op x, c1) has one use + MatchInfo = [=](MachineIRBuilder &B) { + auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS}); + B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS}); + }; + return true; + } + } + + return false; +} + +bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI, + BuildFnTy &MatchInfo) { + // We don't check if the reassociation will break a legal addressing mode + // here since pointer arithmetic is handled by G_PTR_ADD. + unsigned Opc = MI.getOpcode(); + Register DstReg = MI.getOperand(0).getReg(); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + + if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo)) + return true; + if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo)) + return true; + return false; +} bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { Register Op1 = MI.getOperand(1).getReg(); @@ -4766,7 +4589,7 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd( return false; // No point in combining if there's nothing to truncate. - unsigned NarrowWidth = Mask.countTrailingOnes(); + unsigned NarrowWidth = Mask.countr_one(); if (NarrowWidth == WideTy.getSizeInBits()) return false; LLT NarrowTy = LLT::scalar(NarrowWidth); @@ -4956,7 +4779,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { // Magic algorithm doesn't work for division by 1. We need to emit a select // at the end. // TODO: Use undef values for divisor of 1. - if (!Divisor.isOneValue()) { + if (!Divisor.isOne()) { UnsignedDivisionByConstantInfo magics = UnsignedDivisionByConstantInfo::get(Divisor); @@ -5144,7 +4967,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { auto *CI = cast<ConstantInt>(C); APInt Divisor = CI->getValue(); - unsigned Shift = Divisor.countTrailingZeros(); + unsigned Shift = Divisor.countr_zero(); if (Shift) { Divisor.ashrInPlace(Shift); UseSRA = true; @@ -6185,6 +6008,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI, return CmpInst::isEquality(Pred) && Y.isValid(); } +bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { + Register ShiftReg = MI.getOperand(2).getReg(); + LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); + auto IsShiftTooBig = [&](const Constant *C) { + auto *CI = dyn_cast<ConstantInt>(C); + return CI && CI->uge(ResTy.getScalarSizeInBits()); + }; + return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; |