diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-09-02 21:17:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-12-08 17:34:50 +0000 |
commit | 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch) | |
tree | 62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/CodeGen/GlobalISel | |
parent | cf037972ea8863e2bab7461d77345367d2c1e054 (diff) | |
parent | 7fa27ce4a07f19b07799a767fc29416f3b625afb (diff) | |
download | src-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.tar.gz src-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.zip |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/GlobalISel')
18 files changed, 1051 insertions, 812 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index 356d208fc881..e047996f9aa8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -217,10 +217,14 @@ void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) { } void GISelCSEInfo::handleRecordedInsts() { + if (HandlingRecordedInstrs) + return; + HandlingRecordedInstrs = true; while (!TemporaryInsts.empty()) { auto *MI = TemporaryInsts.pop_back_val(); handleRecordedInst(MI); } + HandlingRecordedInstrs = false; } bool GISelCSEInfo::shouldCSE(unsigned Opc) const { @@ -392,9 +396,10 @@ GISelInstProfileBuilder::addNodeIDReg(Register Reg) const { addNodeIDRegType(Ty); if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) { - if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>()) + if (const auto *RB = dyn_cast_if_present<const RegisterBank *>(RCOrRB)) addNodeIDRegType(RB); - else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>()) + else if (const auto *RC = + dyn_cast_if_present<const TargetRegisterClass *>(RCOrRB)) addNodeIDRegType(RC); } return *this; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 89872259cfca..28c33e2038e4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -846,7 +846,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, unsigned NumValues = SplitVTs.size(); Align BaseAlign = DL.getPrefTypeAlign(RetTy); Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace()); - LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL); + LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); @@ -876,8 +876,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, unsigned NumValues = SplitVTs.size(); Align BaseAlign = DL.getPrefTypeAlign(RetTy); unsigned AS = DL.getAllocaAddrSpace(); - LLT OffsetLLTy = - getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL); + LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getPointerTo(AS)), DL); MachinePointerInfo PtrInfo(AS); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index af4bb1634746..cc7fb3ee1109 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -16,7 +16,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" @@ -399,7 +399,8 @@ namespace { /// Select a preference between two uses. CurrentUse is the current preference /// while *ForCandidate is attributes of the candidate under consideration. -PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, +PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI, + PreferredTuple &CurrentUse, const LLT TyForCandidate, unsigned OpcodeForCandidate, MachineInstr *MIForCandidate) { @@ -425,8 +426,10 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; // Prefer sign extensions to zero extensions as sign-extensions tend to be - // more expensive. - if (CurrentUse.Ty == TyForCandidate) { + // more expensive. Don't do this if the load is already a zero-extend load + // though, otherwise we'll rewrite a zero-extend load into a sign-extend + // later. + if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) { if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && OpcodeForCandidate == TargetOpcode::G_ZEXT) return CurrentUse; @@ -535,7 +538,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, // For non power-of-2 types, they will very likely be legalized into multiple // loads. Don't bother trying to match them into extending loads. - if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) + if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits())) return false; // Find the preferred type aside from the any-extends (unless it's the only @@ -566,7 +569,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, .Action != LegalizeActions::Legal) continue; } - Preferred = ChoosePreferredUse(Preferred, + Preferred = ChoosePreferredUse(MI, Preferred, MRI.getType(UseMI.getOperand(0).getReg()), UseMI.getOpcode(), &UseMI); } @@ -727,7 +730,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, Register PtrReg = LoadMI->getPointerReg(); unsigned RegSize = RegTy.getSizeInBits(); uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); - unsigned MaskSizeBits = MaskVal.countTrailingOnes(); + unsigned MaskSizeBits = MaskVal.countr_one(); // The mask may not be larger than the in-memory type, as it might cover sign // extended bits @@ -1189,16 +1192,22 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI, Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM; // Check which instruction is first in the block so we don't break def-use - // deps by "moving" the instruction incorrectly. - if (dominates(MI, *OtherMI)) + // deps by "moving" the instruction incorrectly. Also keep track of which + // instruction is first so we pick it's operands, avoiding use-before-def + // bugs. + MachineInstr *FirstInst; + if (dominates(MI, *OtherMI)) { Builder.setInstrAndDebugLoc(MI); - else + FirstInst = &MI; + } else { Builder.setInstrAndDebugLoc(*OtherMI); + FirstInst = OtherMI; + } Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM : TargetOpcode::G_UDIVREM, {DestDivReg, DestRemReg}, - {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); + { FirstInst->getOperand(1), FirstInst->getOperand(2) }); MI.eraseFromParent(); OtherMI->eraseFromParent(); } @@ -1285,65 +1294,57 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { LegalizerHelper::LegalizeResult::Legalized; } -static std::optional<APFloat> -constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op, - const MachineRegisterInfo &MRI) { - const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); - if (!MaybeCst) - return std::nullopt; - - APFloat V = MaybeCst->getValueAPF(); - switch (Opcode) { +static APFloat constantFoldFpUnary(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const APFloat &Val) { + APFloat Result(Val); + switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); case TargetOpcode::G_FNEG: { - V.changeSign(); - return V; + Result.changeSign(); + return Result; } case TargetOpcode::G_FABS: { - V.clearSign(); - return V; + Result.clearSign(); + return Result; + } + case TargetOpcode::G_FPTRUNC: { + bool Unused; + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Result.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, + &Unused); + return Result; } - case TargetOpcode::G_FPTRUNC: - break; case TargetOpcode::G_FSQRT: { bool Unused; - V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); - V = APFloat(sqrt(V.convertToDouble())); + Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &Unused); + Result = APFloat(sqrt(Result.convertToDouble())); break; } case TargetOpcode::G_FLOG2: { bool Unused; - V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); - V = APFloat(log2(V.convertToDouble())); + Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &Unused); + Result = APFloat(log2(Result.convertToDouble())); break; } } // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, - // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, - // and `G_FLOG2` reach here. + // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and + // `G_FLOG2` reach here. bool Unused; - V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); - return V; + Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused); + return Result; } -bool CombinerHelper::matchCombineConstantFoldFpUnary( - MachineInstr &MI, std::optional<APFloat> &Cst) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); - return Cst.has_value(); -} - -void CombinerHelper::applyCombineConstantFoldFpUnary( - MachineInstr &MI, std::optional<APFloat> &Cst) { - assert(Cst && "Optional is unexpectedly empty!"); +void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, + const ConstantFP *Cst) { Builder.setInstrAndDebugLoc(MI); - MachineFunction &MF = Builder.getMF(); - auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); - Register DstReg = MI.getOperand(0).getReg(); - Builder.buildFConstant(DstReg, *FPVal); + APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue()); + const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded); + Builder.buildFConstant(MI.getOperand(0), *NewCst); MI.eraseFromParent(); } @@ -1621,6 +1622,41 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, MI.eraseFromParent(); } +bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL"); + // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) + auto &Shl = cast<GenericMachineInstr>(MI); + Register DstReg = Shl.getReg(0); + Register SrcReg = Shl.getReg(1); + Register ShiftReg = Shl.getReg(2); + Register X, C1; + + if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize())) + return false; + + if (!mi_match(SrcReg, MRI, + m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X), m_Reg(C1)), + m_GOr(m_Reg(X), m_Reg(C1)))))) + return false; + + APInt C1Val, C2Val; + if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) || + !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val))) + return false; + + auto *SrcDef = MRI.getVRegDef(SrcReg); + assert((SrcDef->getOpcode() == TargetOpcode::G_ADD || + SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op"); + LLT SrcTy = MRI.getType(SrcReg); + MatchInfo = [=](MachineIRBuilder &B) { + auto S1 = B.buildShl(SrcTy, X, ShiftReg); + auto S2 = B.buildShl(SrcTy, C1, ShiftReg); + B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2}); + }; + return true; +} + bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); @@ -1658,9 +1694,9 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) return false; - // TODO: Should handle vector splat. Register RHS = MI.getOperand(2).getReg(); - auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI); + MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS); + auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI); if (!MaybeShiftAmtVal) return false; @@ -1675,12 +1711,13 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, return false; } - int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); + int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue(); MatchData.Reg = ExtSrc; MatchData.Imm = ShiftAmt; - unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); - return MinLeadingZeros >= ShiftAmt; + unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one(); + unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits(); + return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize; } void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, @@ -1763,6 +1800,15 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues( for (unsigned Idx = 0; Idx < NumElems; ++Idx) { Register DstReg = MI.getOperand(Idx).getReg(); Register SrcReg = Operands[Idx]; + + // This combine may run after RegBankSelect, so we need to be aware of + // register banks. + const auto &DstCB = MRI.getRegClassOrRegBank(DstReg); + if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) { + SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0); + MRI.setRegClassOrRegBank(SrcReg, DstCB); + } + if (CanReuseInputDirectly) replaceRegWith(MRI, DstReg, SrcReg); else @@ -2426,10 +2472,7 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { return true; } -bool CombinerHelper::eraseInst(MachineInstr &MI) { - MI.eraseFromParent(); - return true; -} +void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); } bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) { @@ -2537,7 +2580,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { MaybeCst->getSExtValue() == C; } -bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, +void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); Register OldReg = MI.getOperand(0).getReg(); @@ -2545,17 +2588,15 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); MI.eraseFromParent(); replaceRegWith(MRI, OldReg, Replacement); - return true; } -bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, +void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); Register OldReg = MI.getOperand(0).getReg(); assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); MI.eraseFromParent(); replaceRegWith(MRI, OldReg, Replacement); - return true; } bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { @@ -2590,36 +2631,32 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); } -bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { +void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Builder.buildFConstant(MI.getOperand(0), C); MI.eraseFromParent(); - return true; } -bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { +void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Builder.buildConstant(MI.getOperand(0), C); MI.eraseFromParent(); - return true; } -bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { +void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Builder.buildConstant(MI.getOperand(0), C); MI.eraseFromParent(); - return true; } -bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { +void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Builder.buildUndef(MI.getOperand(0)); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchSimplifyAddToSub( @@ -2750,9 +2787,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( Register Y = RightHandInst->getOperand(1).getReg(); LLT XTy = MRI.getType(X); LLT YTy = MRI.getType(Y); - if (XTy != YTy) - return false; - if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) + if (!XTy.isValid() || XTy != YTy) return false; // Optional extra source register. @@ -2779,6 +2814,9 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( } } + if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) + return false; + // Record the steps to build the new instructions. // // Steps to build (logic x, y) @@ -3227,7 +3265,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI, /// \p SelectOperand is the operand in binary operator \p MI that is the select /// to fold. -bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, +void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOperand) { Builder.setInstrAndDebugLoc(MI); @@ -3263,8 +3301,6 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags()); MI.eraseFromParent(); - - return true; } std::optional<SmallVector<Register, 8>> @@ -3612,275 +3648,6 @@ bool CombinerHelper::matchLoadOrCombine( return true; } -/// Check if the store \p Store is a truncstore that can be merged. That is, -/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty -/// Register then it does not need to match and SrcVal is set to the source -/// value found. -/// On match, returns the start byte offset of the \p SrcVal that is being -/// stored. -static std::optional<int64_t> -getTruncStoreByteOffset(GStore &Store, Register &SrcVal, - MachineRegisterInfo &MRI) { - Register TruncVal; - if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) - return std::nullopt; - - // The shift amount must be a constant multiple of the narrow type. - // It is translated to the offset address in the wide source value "y". - // - // x = G_LSHR y, ShiftAmtC - // s8 z = G_TRUNC x - // store z, ... - Register FoundSrcVal; - int64_t ShiftAmt; - if (!mi_match(TruncVal, MRI, - m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), - m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { - if (!SrcVal.isValid() || TruncVal == SrcVal) { - if (!SrcVal.isValid()) - SrcVal = TruncVal; - return 0; // If it's the lowest index store. - } - return std::nullopt; - } - - unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); - if (ShiftAmt % NarrowBits!= 0) - return std::nullopt; - const unsigned Offset = ShiftAmt / NarrowBits; - - if (SrcVal.isValid() && FoundSrcVal != SrcVal) - return std::nullopt; - - if (!SrcVal.isValid()) - SrcVal = FoundSrcVal; - else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) - return std::nullopt; - return Offset; -} - -/// Match a pattern where a wide type scalar value is stored by several narrow -/// stores. Fold it into a single store or a BSWAP and a store if the targets -/// supports it. -/// -/// Assuming little endian target: -/// i8 *p = ... -/// i32 val = ... -/// p[0] = (val >> 0) & 0xFF; -/// p[1] = (val >> 8) & 0xFF; -/// p[2] = (val >> 16) & 0xFF; -/// p[3] = (val >> 24) & 0xFF; -/// => -/// *((i32)p) = val; -/// -/// i8 *p = ... -/// i32 val = ... -/// p[0] = (val >> 24) & 0xFF; -/// p[1] = (val >> 16) & 0xFF; -/// p[2] = (val >> 8) & 0xFF; -/// p[3] = (val >> 0) & 0xFF; -/// => -/// *((i32)p) = BSWAP(val); -bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI, - MergeTruncStoresInfo &MatchInfo) { - auto &StoreMI = cast<GStore>(MI); - LLT MemTy = StoreMI.getMMO().getMemoryType(); - - // We only handle merging simple stores of 1-4 bytes. - if (!MemTy.isScalar()) - return false; - switch (MemTy.getSizeInBits()) { - case 8: - case 16: - case 32: - break; - default: - return false; - } - if (!StoreMI.isSimple()) - return false; - - // We do a simple search for mergeable stores prior to this one. - // Any potential alias hazard along the way terminates the search. - SmallVector<GStore *> FoundStores; - - // We're looking for: - // 1) a (store(trunc(...))) - // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get - // the partial value stored. - // 3) where the offsets form either a little or big-endian sequence. - - auto &LastStore = StoreMI; - - // The single base pointer that all stores must use. - Register BaseReg; - int64_t LastOffset; - if (!mi_match(LastStore.getPointerReg(), MRI, - m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { - BaseReg = LastStore.getPointerReg(); - LastOffset = 0; - } - - GStore *LowestIdxStore = &LastStore; - int64_t LowestIdxOffset = LastOffset; - - Register WideSrcVal; - auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI); - if (!LowestShiftAmt) - return false; // Didn't match a trunc. - assert(WideSrcVal.isValid()); - - LLT WideStoreTy = MRI.getType(WideSrcVal); - // The wide type might not be a multiple of the memory type, e.g. s48 and s32. - if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) - return false; - const unsigned NumStoresRequired = - WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); - - SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX); - OffsetMap[*LowestShiftAmt] = LastOffset; - FoundStores.emplace_back(&LastStore); - - // Search the block up for more stores. - // We use a search threshold of 10 instructions here because the combiner - // works top-down within a block, and we don't want to search an unbounded - // number of predecessor instructions trying to find matching stores. - // If we moved this optimization into a separate pass then we could probably - // use a more efficient search without having a hard-coded threshold. - const int MaxInstsToCheck = 10; - int NumInstsChecked = 0; - for (auto II = ++LastStore.getReverseIterator(); - II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; - ++II) { - NumInstsChecked++; - GStore *NewStore; - if ((NewStore = dyn_cast<GStore>(&*II))) { - if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) - break; - } else if (II->isLoadFoldBarrier() || II->mayLoad()) { - break; - } else { - continue; // This is a safe instruction we can look past. - } - - Register NewBaseReg; - int64_t MemOffset; - // Check we're storing to the same base + some offset. - if (!mi_match(NewStore->getPointerReg(), MRI, - m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { - NewBaseReg = NewStore->getPointerReg(); - MemOffset = 0; - } - if (BaseReg != NewBaseReg) - break; - - auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI); - if (!ShiftByteOffset) - break; - if (MemOffset < LowestIdxOffset) { - LowestIdxOffset = MemOffset; - LowestIdxStore = NewStore; - } - - // Map the offset in the store and the offset in the combined value, and - // early return if it has been set before. - if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || - OffsetMap[*ShiftByteOffset] != INT64_MAX) - break; - OffsetMap[*ShiftByteOffset] = MemOffset; - - FoundStores.emplace_back(NewStore); - // Reset counter since we've found a matching inst. - NumInstsChecked = 0; - if (FoundStores.size() == NumStoresRequired) - break; - } - - if (FoundStores.size() != NumStoresRequired) { - return false; - } - - const auto &DL = LastStore.getMF()->getDataLayout(); - auto &C = LastStore.getMF()->getFunction().getContext(); - // Check that a store of the wide type is both allowed and fast on the target - unsigned Fast = 0; - bool Allowed = getTargetLowering().allowsMemoryAccess( - C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); - if (!Allowed || !Fast) - return false; - - // Check if the pieces of the value are going to the expected places in memory - // to merge the stores. - unsigned NarrowBits = MemTy.getScalarSizeInBits(); - auto checkOffsets = [&](bool MatchLittleEndian) { - if (MatchLittleEndian) { - for (unsigned i = 0; i != NumStoresRequired; ++i) - if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) - return false; - } else { // MatchBigEndian by reversing loop counter. - for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired; - ++i, --j) - if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) - return false; - } - return true; - }; - - // Check if the offsets line up for the native data layout of this target. - bool NeedBswap = false; - bool NeedRotate = false; - if (!checkOffsets(DL.isLittleEndian())) { - // Special-case: check if byte offsets line up for the opposite endian. - if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) - NeedBswap = true; - else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian())) - NeedRotate = true; - else - return false; - } - - if (NeedBswap && - !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}})) - return false; - if (NeedRotate && - !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}})) - return false; - - MatchInfo.NeedBSwap = NeedBswap; - MatchInfo.NeedRotate = NeedRotate; - MatchInfo.LowestIdxStore = LowestIdxStore; - MatchInfo.WideSrcVal = WideSrcVal; - MatchInfo.FoundStores = std::move(FoundStores); - return true; -} - -void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI, - MergeTruncStoresInfo &MatchInfo) { - - Builder.setInstrAndDebugLoc(MI); - Register WideSrcVal = MatchInfo.WideSrcVal; - LLT WideStoreTy = MRI.getType(WideSrcVal); - - if (MatchInfo.NeedBSwap) { - WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); - } else if (MatchInfo.NeedRotate) { - assert(WideStoreTy.getSizeInBits() % 2 == 0 && - "Unexpected type for rotate"); - auto RotAmt = - Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); - WideSrcVal = - Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); - } - - Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(), - MatchInfo.LowestIdxStore->getMMO().getPointerInfo(), - MatchInfo.LowestIdxStore->getMMO().getAlign()); - - // Erase the old stores. - for (auto *ST : MatchInfo.FoundStores) - ST->eraseFromParent(); -} - bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) { assert(MI.getOpcode() == TargetOpcode::G_PHI); @@ -4395,7 +4162,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd( if (static_cast<uint64_t>(LSBImm) >= Size) return false; - uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); + uint64_t Width = APInt(Size, AndImm).countr_one(); MatchInfo = [=](MachineIRBuilder &B) { auto WidthCst = B.buildConstant(ExtractTy, Width); auto LSBCst = B.buildConstant(ExtractTy, LSBImm); @@ -4496,7 +4263,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( // Calculate start position and width of the extract. const int64_t Pos = ShrAmt; - const int64_t Width = countTrailingOnes(UMask) - ShrAmt; + const int64_t Width = llvm::countr_one(UMask) - ShrAmt; // It's preferable to keep the shift, rather than form G_SBFX. // TODO: remove the G_AND via demanded bits analysis. @@ -4695,6 +4462,62 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI, return false; } +bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg, + Register OpLHS, Register OpRHS, + BuildFnTy &MatchInfo) { + LLT OpRHSTy = MRI.getType(OpRHS); + MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS); + + if (OpLHSDef->getOpcode() != Opc) + return false; + + MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS); + Register OpLHSLHS = OpLHSDef->getOperand(1).getReg(); + Register OpLHSRHS = OpLHSDef->getOperand(2).getReg(); + + // If the inner op is (X op C), pull the constant out so it can be folded with + // other constants in the expression tree. Folding is not guaranteed so we + // might have (C1 op C2). In that case do not pull a constant out because it + // won't help and can lead to infinite loops. + if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) && + !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) { + if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) { + // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2)) + MatchInfo = [=](MachineIRBuilder &B) { + auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS}); + B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst}); + }; + return true; + } + if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) { + // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) + // iff (op x, c1) has one use + MatchInfo = [=](MachineIRBuilder &B) { + auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS}); + B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS}); + }; + return true; + } + } + + return false; +} + +bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI, + BuildFnTy &MatchInfo) { + // We don't check if the reassociation will break a legal addressing mode + // here since pointer arithmetic is handled by G_PTR_ADD. + unsigned Opc = MI.getOpcode(); + Register DstReg = MI.getOperand(0).getReg(); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + + if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo)) + return true; + if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo)) + return true; + return false; +} bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { Register Op1 = MI.getOperand(1).getReg(); @@ -4766,7 +4589,7 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd( return false; // No point in combining if there's nothing to truncate. - unsigned NarrowWidth = Mask.countTrailingOnes(); + unsigned NarrowWidth = Mask.countr_one(); if (NarrowWidth == WideTy.getSizeInBits()) return false; LLT NarrowTy = LLT::scalar(NarrowWidth); @@ -4956,7 +4779,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { // Magic algorithm doesn't work for division by 1. We need to emit a select // at the end. // TODO: Use undef values for divisor of 1. - if (!Divisor.isOneValue()) { + if (!Divisor.isOne()) { UnsignedDivisionByConstantInfo magics = UnsignedDivisionByConstantInfo::get(Divisor); @@ -5144,7 +4967,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { auto *CI = cast<ConstantInt>(C); APInt Divisor = CI->getValue(); - unsigned Shift = Divisor.countTrailingZeros(); + unsigned Shift = Divisor.countr_zero(); if (Shift) { Divisor.ashrInPlace(Shift); UseSRA = true; @@ -6185,6 +6008,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI, return CmpInst::isEquality(Pred) && Y.isValid(); } +bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { + Register ShiftReg = MI.getOperand(2).getReg(); + LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); + auto IsShiftTooBig = [&](const Constant *C) { + auto *CI = dyn_cast<ConstantInt>(C); + return CI && CI->uge(ResTy.getScalarSizeInBits()); + }; + return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp new file mode 100644 index 000000000000..d747cbf5aadc --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp @@ -0,0 +1,68 @@ +//===- llvm/CodeGen/GlobalISel/GIMatchTableExecutor.cpp -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements the GIMatchTableExecutor class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define DEBUG_TYPE "gi-match-table-executor" + +using namespace llvm; + +GIMatchTableExecutor::MatcherState::MatcherState(unsigned MaxRenderers) + : Renderers(MaxRenderers) {} + +GIMatchTableExecutor::GIMatchTableExecutor() = default; + +bool GIMatchTableExecutor::isOperandImmEqual( + const MachineOperand &MO, int64_t Value, + const MachineRegisterInfo &MRI) const { + if (MO.isReg() && MO.getReg()) + if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI)) + return VRegVal->Value.getSExtValue() == Value; + return false; +} + +bool GIMatchTableExecutor::isBaseWithConstantOffset( + const MachineOperand &Root, const MachineRegisterInfo &MRI) const { + if (!Root.isReg()) + return false; + + MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); + if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD) + return false; + + MachineOperand &RHS = RootI->getOperand(2); + MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg()); + if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + + return true; +} + +bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI, + MachineInstr &IntoMI) const { + // Immediate neighbours are already folded. + if (MI.getParent() == IntoMI.getParent() && + std::next(MI.getIterator()) == IntoMI.getIterator()) + return true; + + // Convergent instructions cannot be moved in the CFG. + if (MI.isConvergent() && MI.getParent() != IntoMI.getParent()) + return false; + + return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && + !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index bfbe7e1c3e55..363ffbfa90b5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -115,7 +116,7 @@ void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); // Only known if known in both the LHS and RHS. - Known = KnownBits::commonBits(Known, Known2); + Known = Known.intersectWith(Known2); } // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is @@ -191,7 +192,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); // Known bits are the values that are shared by every demanded element. - Known = KnownBits::commonBits(Known, Known2); + Known = Known.intersectWith(Known2); // If we don't know any bits, early out. if (Known.isUnknown()) @@ -235,10 +236,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, // For COPYs we don't do anything, don't increase the depth. computeKnownBitsImpl(SrcReg, Known2, DemandedElts, Depth + (Opcode != TargetOpcode::COPY)); - Known = KnownBits::commonBits(Known, Known2); + Known = Known.intersectWith(Known2); // If we reach a point where we don't know anything // just stop looking through the operands. - if (Known.One == 0 && Known.Zero == 0) + if (Known.isUnknown()) break; } else { // We know nothing. @@ -750,7 +751,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, // Okay, we know that the sign bit in Mask is set. Use CLO to determine // the number of identical bits in the top of the input value. Mask <<= Mask.getBitWidth() - TyBits; - return std::max(FirstAnswer, Mask.countLeadingOnes()); + return std::max(FirstAnswer, Mask.countl_one()); } unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 7d811dc0ad8f..9a67a8d05a4d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -43,6 +44,7 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -74,7 +76,6 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -300,7 +301,7 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; + uint32_t Flags = 0; if (isa<Instruction>(U)) { const Instruction &I = cast<Instruction>(U); Flags = MachineInstr::copyFlagsFromInstruction(I); @@ -314,7 +315,7 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; + uint32_t Flags = 0; if (isa<Instruction>(U)) { const Instruction &I = cast<Instruction>(U); Flags = MachineInstr::copyFlagsFromInstruction(I); @@ -345,7 +346,7 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { - uint16_t Flags = 0; + uint32_t Flags = 0; if (CI) Flags = MachineInstr::copyFlagsFromInstruction(*CI); MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags); @@ -844,8 +845,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, // just re-use the existing condition vreg. - if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && - CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && CI->isOne() && + CB.PredInfo.Pred == CmpInst::ICMP_EQ) { Cond = CondLHS; } else { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); @@ -1018,7 +1019,7 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, LLT MaskTy = SwitchOpTy; if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() || - !isPowerOf2_32(MaskTy.getSizeInBits())) + !llvm::has_single_bit<uint32_t>(MaskTy.getSizeInBits())) MaskTy = LLT::scalar(PtrTy.getSizeInBits()); else { // Ensure that the type will fit the mask value. @@ -1074,14 +1075,14 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. auto MaskTrailingZeros = - MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask)); + MIB.buildConstant(SwitchTy, llvm::countr_zero(B.Mask)); Cmp = MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros) .getReg(0); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. auto MaskTrailingOnes = - MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask)); + MIB.buildConstant(SwitchTy, llvm::countr_one(B.Mask)); Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes) .getReg(0); } else { @@ -1294,7 +1295,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { AAMDNodes AAInfo = LI.getAAMetadata(); const Value *Ptr = LI.getPointerOperand(); - Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType()); + Type *OffsetIRTy = DL->getIndexType(Ptr->getType()); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); if (CLI->supportSwiftError() && isSwiftError(Ptr)) { @@ -1342,7 +1343,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand()); Register Base = getOrCreateVReg(*SI.getPointerOperand()); - Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType()); + Type *OffsetIRTy = DL->getIndexType(SI.getPointerOperandType()); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) { @@ -1438,7 +1439,7 @@ bool IRTranslator::translateSelect(const User &U, ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); - uint16_t Flags = 0; + uint32_t Flags = 0; if (const SelectInst *SI = dyn_cast<SelectInst>(&U)) Flags = MachineInstr::copyFlagsFromInstruction(*SI); @@ -1468,8 +1469,14 @@ bool IRTranslator::translateBitCast(const User &U, MachineIRBuilder &MIRBuilder) { // If we're bitcasting to the source type, we can reuse the source vreg. if (getLLTForType(*U.getOperand(0)->getType(), *DL) == - getLLTForType(*U.getType(), *DL)) + getLLTForType(*U.getType(), *DL)) { + // If the source is a ConstantInt then it was probably created by + // ConstantHoisting and we should leave it alone. + if (isa<ConstantInt>(U.getOperand(0))) + return translateCast(TargetOpcode::G_CONSTANT_FOLD_BARRIER, U, + MIRBuilder); return translateCopy(U, *U.getOperand(0), MIRBuilder); + } return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); } @@ -1488,7 +1495,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, Register BaseReg = getOrCreateVReg(Op0); Type *PtrIRTy = Op0.getType(); LLT PtrTy = getLLTForType(*PtrIRTy, *DL); - Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy); + Type *OffsetIRTy = DL->getIndexType(PtrIRTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); // Normalize Vector GEP - all scalar operands should be converted to the @@ -1513,7 +1520,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, .getReg(0); PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth); PtrTy = getLLTForType(*PtrIRTy, *DL); - OffsetIRTy = DL->getIntPtrType(PtrIRTy); + OffsetIRTy = DL->getIndexType(PtrIRTy); OffsetTy = getLLTForType(*OffsetIRTy, *DL); } @@ -1759,6 +1766,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_FLOG2; case Intrinsic::log10: return TargetOpcode::G_FLOG10; + case Intrinsic::ldexp: + return TargetOpcode::G_FLDEXP; case Intrinsic::nearbyint: return TargetOpcode::G_FNEARBYINT; case Intrinsic::pow: @@ -1851,6 +1860,8 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) { return TargetOpcode::G_STRICT_FMA; case Intrinsic::experimental_constrained_sqrt: return TargetOpcode::G_STRICT_FSQRT; + case Intrinsic::experimental_constrained_ldexp: + return TargetOpcode::G_STRICT_FLDEXP; default: return 0; } @@ -1864,7 +1875,7 @@ bool IRTranslator::translateConstrainedFPIntrinsic( if (!Opcode) return false; - unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI); if (EB == fp::ExceptionBehavior::ebIgnore) Flags |= MachineInstr::NoFPExcept; @@ -1879,6 +1890,60 @@ bool IRTranslator::translateConstrainedFPIntrinsic( return true; } +std::optional<MCRegister> IRTranslator::getArgPhysReg(Argument &Arg) { + auto VRegs = getOrCreateVRegs(Arg); + if (VRegs.size() != 1) + return std::nullopt; + + // Arguments are lowered as a copy of a livein physical register. + auto *VRegDef = MF->getRegInfo().getVRegDef(VRegs[0]); + if (!VRegDef || !VRegDef->isCopy()) + return std::nullopt; + return VRegDef->getOperand(1).getReg().asMCReg(); +} + +bool IRTranslator::translateIfEntryValueArgument(const DbgValueInst &DebugInst, + MachineIRBuilder &MIRBuilder) { + auto *Arg = dyn_cast<Argument>(DebugInst.getValue()); + if (!Arg) + return false; + + const DIExpression *Expr = DebugInst.getExpression(); + if (!Expr->isEntryValue()) + return false; + + std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg); + if (!PhysReg) { + LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but " + "couldn't find a physical register\n" + << DebugInst << "\n"); + return true; + } + + MIRBuilder.buildDirectDbgValue(*PhysReg, DebugInst.getVariable(), + DebugInst.getExpression()); + return true; +} + +bool IRTranslator::translateIfEntryValueArgument( + const DbgDeclareInst &DebugInst) { + auto *Arg = dyn_cast<Argument>(DebugInst.getAddress()); + if (!Arg) + return false; + + const DIExpression *Expr = DebugInst.getExpression(); + if (!Expr->isEntryValue()) + return false; + + std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg); + if (!PhysReg) + return false; + + MF->setVariableDbgInfo(DebugInst.getVariable(), Expr, *PhysReg, + DebugInst.getDebugLoc()); + return true; +} + bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) { @@ -1945,12 +2010,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // instructions (in fact, they get ignored if they *do* exist). MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(), getOrCreateFrameIndex(*AI), DI.getDebugLoc()); - } else { - // A dbg.declare describes the address of a source variable, so lower it - // into an indirect DBG_VALUE. - MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address), - DI.getVariable(), DI.getExpression()); + return true; } + + if (translateIfEntryValueArgument(DI)) + return true; + + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address), + DI.getVariable(), DI.getExpression()); return true; } case Intrinsic::dbg_label: { @@ -1991,16 +2060,32 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to // terminate any prior location. MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); - } else if (const auto *CI = dyn_cast<Constant>(V)) { + return true; + } + if (const auto *CI = dyn_cast<Constant>(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); - } else { - for (Register Reg : getOrCreateVRegs(*V)) { - // FIXME: This does not handle register-indirect values at offset 0. The - // direct/indirect thing shouldn't really be handled by something as - // implicit as reg+noreg vs reg+imm in the first place, but it seems - // pretty baked in right now. - MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); - } + return true; + } + if (auto *AI = dyn_cast<AllocaInst>(V); + AI && AI->isStaticAlloca() && DI.getExpression()->startsWithDeref()) { + // If the value is an alloca and the expression starts with a + // dereference, track a stack slot instead of a register, as registers + // may be clobbered. + auto ExprOperands = DI.getExpression()->getElements(); + auto *ExprDerefRemoved = + DIExpression::get(AI->getContext(), ExprOperands.drop_front()); + MIRBuilder.buildFIDbgValue(getOrCreateFrameIndex(*AI), DI.getVariable(), + ExprDerefRemoved); + return true; + } + if (translateIfEntryValueArgument(DI, MIRBuilder)) + return true; + for (Register Reg : getOrCreateVRegs(*V)) { + // FIXME: This does not handle register-indirect values at offset 0. The + // direct/indirect thing shouldn't really be handled by something as + // implicit as reg+noreg vs reg+imm in the first place, but it seems + // pretty baked in right now. + MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); } return true; } @@ -2090,6 +2175,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getOrCreateVReg(*CI.getArgOperand(0)), MachineInstr::copyFlagsFromInstruction(CI)); return true; + case Intrinsic::frexp: { + ArrayRef<Register> VRegs = getOrCreateVRegs(CI); + MIRBuilder.buildFFrexp(VRegs[0], VRegs[1], + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + } case Intrinsic::memcpy_inline: return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE); case Intrinsic::memcpy: @@ -2296,7 +2388,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return CLI->lowerCall(MIRBuilder, Info); } case Intrinsic::fptrunc_round: { - unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI); // Convert the metadata argument to a constant integer Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index e0357c50e555..3925611f1485 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -391,10 +391,12 @@ bool InlineAsmLowering::lowerInlineAsm( Inst.addReg(SourceRegs[0]); } else { // Otherwise, this outputs to a register (directly for C_Register / - // C_RegisterClass. Find a register that we can use. + // C_RegisterClass/C_Other. assert(OpInfo.ConstraintType == TargetLowering::C_Register || - OpInfo.ConstraintType == TargetLowering::C_RegisterClass); + OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Other); + // Find a register that we can use. if (OpInfo.Regs.empty()) { LLVM_DEBUG(dbgs() << "Couldn't allocate output register for constraint\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index f780050ca3f1..9bbef11067ae 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/config.h" @@ -104,7 +105,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { CodeGenCoverage CoverageInfo; assert(ISel && "Cannot work without InstructionSelector"); - ISel->setupMF(MF, KB, CoverageInfo, PSI, BFI); + ISel->setupMF(MF, KB, &CoverageInfo, PSI, BFI); // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); @@ -165,12 +166,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; } - // Eliminate hints. - if (isPreISelGenericOptimizationHint(MI.getOpcode())) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); + // Eliminate hints or G_CONSTANT_FOLD_BARRIER. + if (isPreISelGenericOptimizationHint(MI.getOpcode()) || + MI.getOpcode() == TargetOpcode::G_CONSTANT_FOLD_BARRIER) { + auto [DstReg, SrcReg] = MI.getFirst2Regs(); - // At this point, the destination register class of the hint may have + // At this point, the destination register class of the op may have // been decided. // // Propagate that through to the source register. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 8959d215ecd1..c48591cc2f02 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -5,64 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -/// \file -/// This file implements the InstructionSelector class. -// -//===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -#define DEBUG_TYPE "instructionselector" - -using namespace llvm; - -InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) - : Renderers(MaxRenderers) {} - -InstructionSelector::InstructionSelector() = default; - -bool InstructionSelector::isOperandImmEqual( - const MachineOperand &MO, int64_t Value, - const MachineRegisterInfo &MRI) const { - if (MO.isReg() && MO.getReg()) - if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI)) - return VRegVal->Value.getSExtValue() == Value; - return false; -} - -bool InstructionSelector::isBaseWithConstantOffset( - const MachineOperand &Root, const MachineRegisterInfo &MRI) const { - if (!Root.isReg()) - return false; - - MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); - if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD) - return false; - - MachineOperand &RHS = RootI->getOperand(2); - MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg()); - if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT) - return false; - - return true; -} -bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, - MachineInstr &IntoMI) const { - // Immediate neighbours are already folded. - if (MI.getParent() == IntoMI.getParent() && - std::next(MI.getIterator()) == IntoMI.getIterator()) - return true; +namespace llvm { - // Convergent instructions cannot be moved in the CFG. - if (MI.isConvergent() && MI.getParent() != IntoMI.getParent()) - return false; +// vtable anchor +InstructionSelector::~InstructionSelector() = default; - return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && - !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); -} +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 54a82cac95d5..2c77ed8b0600 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -164,7 +164,8 @@ LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx, LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; - return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits()); + return QueryTy.isScalar() && + !llvm::has_single_bit<uint32_t>(QueryTy.getSizeInBits()); }; } @@ -184,14 +185,16 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0, LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { - return !isPowerOf2_32(Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes()); + return !llvm::has_single_bit<uint32_t>( + Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes()); }; } LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy; - return !MemTy.isByteSized() || !isPowerOf2_32(MemTy.getSizeInBytes()); + return !MemTy.isByteSized() || + !llvm::has_single_bit<uint32_t>(MemTy.getSizeInBytes()); }; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 1a13f39c100c..aecbe0b7604c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" #include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" @@ -75,6 +76,7 @@ INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE, "Legalize the Machine IR a function's Machine IR", false, false) @@ -85,6 +87,8 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelCSEAnalysisWrapperPass>(); AU.addPreserved<GISelCSEAnalysisWrapperPass>(); + AU.addRequired<GISelKnownBitsAnalysis>(); + AU.addPreserved<GISelKnownBitsAnalysis>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -173,7 +177,8 @@ Legalizer::MFResult Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, ArrayRef<GISelChangeObserver *> AuxObservers, LostDebugLocObserver &LocObserver, - MachineIRBuilder &MIRBuilder) { + MachineIRBuilder &MIRBuilder, + GISelKnownBits *KB) { MIRBuilder.setMF(MF); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -212,7 +217,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, // Now install the observer as the delegate to MF. // This will keep all the observers notified about new insertions/deletions. RAIIMFObsDelInstaller Installer(MF, WrapperObserver); - LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder); + LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB); LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); bool Changed = false; SmallVector<MachineInstr *, 128> RetryList; @@ -314,8 +319,6 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); - const size_t NumBlocks = MF.size(); - std::unique_ptr<MachineIRBuilder> MIRBuilder; GISelCSEInfo *CSEInfo = nullptr; bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences() @@ -338,25 +341,18 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { if (VerifyDebugLocs > DebugLocVerifyLevel::None) AuxObservers.push_back(&LocObserver); + // This allows Known Bits Analysis in the legalizer. + GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); + const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo(); - MFResult Result = - legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, *MIRBuilder); + MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, + *MIRBuilder, KB); if (Result.FailedOn) { reportGISelFailure(MF, TPC, MORE, "gisel-legalize", "unable to legalize instruction", *Result.FailedOn); return false; } - // For now don't support if new blocks are inserted - we would need to fix the - // outer loop for that. - if (MF.size() != NumBlocks) { - MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure", - MF.getFunction().getSubprogram(), - /*MBB=*/nullptr); - R << "inserting blocks is not supported yet"; - reportGISelFailure(MF, TPC, MORE, R); - return false; - } if (LocObserver.getNumLostDebugLocs()) { MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc", diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 8a1fce2d3d65..f0da0d88140f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -15,12 +15,14 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" @@ -102,13 +104,13 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, MachineIRBuilder &Builder) : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()), - TLI(*MF.getSubtarget().getTargetLowering()) { } + TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {} LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, GISelChangeObserver &Observer, - MachineIRBuilder &B) - : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), - TLI(*MF.getSubtarget().getTargetLowering()) { } + MachineIRBuilder &B, GISelKnownBits *KB) + : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), + TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {} LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI, @@ -540,6 +542,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(LOG_F); case TargetOpcode::G_FLOG2: RTLIBCASE(LOG2_F); + case TargetOpcode::G_FLDEXP: + RTLIBCASE(LDEXP_F); case TargetOpcode::G_FCEIL: RTLIBCASE(CEIL_F); case TargetOpcode::G_FFLOOR: @@ -824,6 +828,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FLDEXP: case TargetOpcode::G_FEXP: case TargetOpcode::G_FEXP2: case TargetOpcode::G_FCEIL: @@ -1411,6 +1416,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_FLDEXP: + case TargetOpcode::G_STRICT_FLDEXP: + return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy); } } @@ -1504,13 +1512,11 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 1) return UnableToLegalize; - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); + auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs(); if (DstTy.isVector()) return UnableToLegalize; - Register Src1 = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(Src1); + LLT SrcTy = MRI.getType(Src1Reg); const int DstSize = DstTy.getSizeInBits(); const int SrcSize = SrcTy.getSizeInBits(); const int WideSize = WideTy.getSizeInBits(); @@ -1522,7 +1528,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, if (WideSize >= DstSize) { // Directly pack the bits in the target type. - Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); + Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0); for (unsigned I = 2; I != NumOps; ++I) { const unsigned Offset = (I - 1) * PartSize; @@ -1753,11 +1759,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - - LLT DstTy = MRI.getType(DstReg); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned Offset = MI.getOperand(2).getImm(); if (TypeIdx == 0) { @@ -1978,10 +1980,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, } bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO; - Register Result = MI.getOperand(0).getReg(); - Register OriginalOverflow = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); + auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs(); LLT SrcTy = MRI.getType(LHS); LLT OverflowTy = MRI.getType(OriginalOverflow); unsigned SrcBitWidth = SrcTy.getScalarSizeInBits(); @@ -2560,12 +2559,41 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_FPOWI: { - if (TypeIdx != 0) - return UnableToLegalize; + case TargetOpcode::G_FPOWI: + case TargetOpcode::G_FLDEXP: + case TargetOpcode::G_STRICT_FLDEXP: { + if (TypeIdx == 0) { + if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; + } + + if (TypeIdx == 1) { + // For some reason SelectionDAG tries to promote to a libcall without + // actually changing the integer type for promotion. + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); + return Legalized; + } + + return UnableToLegalize; + } + case TargetOpcode::G_FFREXP: { Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + + if (TypeIdx == 0) { + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + } else { + widenScalarDst(MI, WideTy, 1); + } + Observer.changedInstr(MI); return Legalized; } @@ -2631,12 +2659,34 @@ static void getUnmergePieces(SmallVectorImpl<Register> &Pieces, } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerBitcast(MachineInstr &MI) { +LegalizerHelper::lowerFConstant(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src); + MachineFunction &MF = MIRBuilder.getMF(); + const DataLayout &DL = MIRBuilder.getDataLayout(); + + unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); + LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); + Align Alignment = Align(DL.getABITypeAlign( + getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst)))); + + auto Addr = MIRBuilder.buildConstantPool( + AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex( + MI.getOperand(1).getFPImm(), Alignment)); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, + MRI.getType(Dst), Alignment); + + MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO); + MI.eraseFromParent(); + + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerBitcast(MachineInstr &MI) { + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); if (SrcTy.isVector()) { LLT SrcEltTy = SrcTy.getElementType(); SmallVector<Register, 8> SrcRegs; @@ -2732,11 +2782,7 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 1) return UnableToLegalize; - Register Dst = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register Idx = MI.getOperand(2).getReg(); - LLT SrcVecTy = MRI.getType(SrcVec); - LLT IdxTy = MRI.getType(Idx); + auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs(); LLT SrcEltTy = SrcVecTy.getElementType(); unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; @@ -2872,13 +2918,9 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 0) return UnableToLegalize; - Register Dst = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register Val = MI.getOperand(2).getReg(); - Register Idx = MI.getOperand(3).getReg(); - - LLT VecTy = MRI.getType(Dst); - LLT IdxTy = MRI.getType(Idx); + auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] = + MI.getFirst4RegLLTs(); + LLT VecTy = DstTy; LLT VecEltTy = VecTy.getElementType(); LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; @@ -3004,7 +3046,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { if (!isPowerOf2_32(MemSizeInBits)) { // This load needs splitting into power of 2 sized loads. - LargeSplitSize = PowerOf2Floor(MemSizeInBits); + LargeSplitSize = llvm::bit_floor(MemSizeInBits); SmallSplitSize = MemSizeInBits - LargeSplitSize; } else { // This is already a power of 2, but we still need to split this in half. @@ -3122,7 +3164,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { uint64_t LargeSplitSize, SmallSplitSize; if (!isPowerOf2_32(MemSizeInBits)) { - LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); + LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits()); SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; } else { auto &Ctx = MF.getFunction().getContext(); @@ -3250,6 +3292,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { switch(MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_FCONSTANT: + return lowerFConstant(MI); case TargetOpcode::G_BITCAST: return lowerBitcast(MI); case TargetOpcode::G_SREM: @@ -3274,10 +3318,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the // result. - Register Res = MI.getOperand(0).getReg(); - Register Overflow = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); + auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs(); LLT Ty = MRI.getType(Res); unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO @@ -3308,7 +3349,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case TargetOpcode::G_FNEG: { - Register Res = MI.getOperand(0).getReg(); + auto [Res, SubByReg] = MI.getFirst2Regs(); LLT Ty = MRI.getType(Res); // TODO: Handle vector types once we are able to @@ -3317,23 +3358,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return UnableToLegalize; auto SignMask = MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); - Register SubByReg = MI.getOperand(1).getReg(); MIRBuilder.buildXor(Res, SubByReg, SignMask); MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_FSUB: case TargetOpcode::G_STRICT_FSUB: { - Register Res = MI.getOperand(0).getReg(); + auto [Res, LHS, RHS] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Res); // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). - // First, check if G_FNEG is marked as Lower. If so, we may - // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. - if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) - return UnableToLegalize; - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); auto Neg = MIRBuilder.buildFNeg(Ty, RHS); if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB) @@ -3357,11 +3391,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { - Register OldValRes = MI.getOperand(0).getReg(); - Register SuccessRes = MI.getOperand(1).getReg(); - Register Addr = MI.getOperand(2).getReg(); - Register CmpVal = MI.getOperand(3).getReg(); - Register NewVal = MI.getOperand(4).getReg(); + auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs(); MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, **MI.memoperands_begin()); MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); @@ -3381,10 +3411,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case TargetOpcode::G_CTPOP: return lowerBitCount(MI); case G_UADDO: { - Register Res = MI.getOperand(0).getReg(); - Register CarryOut = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); + auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs(); MIRBuilder.buildAdd(Res, LHS, RHS); MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); @@ -3393,11 +3420,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case G_UADDE: { - Register Res = MI.getOperand(0).getReg(); - Register CarryOut = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); - Register CarryIn = MI.getOperand(4).getReg(); + auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs(); LLT Ty = MRI.getType(Res); auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); @@ -3409,10 +3432,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case G_USUBO: { - Register Res = MI.getOperand(0).getReg(); - Register BorrowOut = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); + auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs(); MIRBuilder.buildSub(Res, LHS, RHS); MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); @@ -3421,11 +3441,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case G_USUBE: { - Register Res = MI.getOperand(0).getReg(); - Register BorrowOut = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); - Register BorrowIn = MI.getOperand(4).getReg(); + auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs(); const LLT CondTy = MRI.getType(BorrowOut); const LLT Ty = MRI.getType(Res); @@ -3470,8 +3486,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { assert(MI.getOperand(2).isImm() && "Expected immediate"); int64_t SizeInBits = MI.getOperand(2).getImm(); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); + auto [DstReg, SrcReg] = MI.getFirst2Regs(); LLT DstTy = MRI.getType(DstReg); Register TmpRes = MRI.createGenericVirtualRegister(DstTy); @@ -3869,9 +3884,7 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); // Requires compatible types. Otherwise user of DstReg did not perform unmerge // that should have been artifact combined. Most likely instruction that uses // DstReg has to do more/fewer elements legalization compatible with NarrowTy. @@ -3958,8 +3971,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowVecTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); + auto [DstReg, SrcVec] = MI.getFirst2Regs(); Register InsertVal; bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; @@ -4159,6 +4171,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FLOG: case G_FLOG2: case G_FLOG10: + case G_FLDEXP: case G_FNEARBYINT: case G_FCEIL: case G_FFLOOR: @@ -4234,6 +4247,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_STRICT_FSUB: case G_STRICT_FMUL: case G_STRICT_FMA: + case G_STRICT_FLDEXP: + case G_FFREXP: return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: @@ -4278,13 +4293,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( if (TypeIdx != 0) return UnableToLegalize; - Register DstReg = MI.getOperand(0).getReg(); - Register Src1Reg = MI.getOperand(1).getReg(); - Register Src2Reg = MI.getOperand(2).getReg(); + auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] = + MI.getFirst3RegLLTs(); ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); - LLT DstTy = MRI.getType(DstReg); - LLT Src1Ty = MRI.getType(Src1Reg); - LLT Src2Ty = MRI.getType(Src2Reg); // The shuffle should be canonicalized by now. if (DstTy != Src1Ty) return UnableToLegalize; @@ -4474,10 +4485,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( // The semantics of the normal non-sequential reductions allow us to freely // re-associate the operation. - Register SrcReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); if (NarrowTy.isVector() && (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)) @@ -4865,6 +4873,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: case TargetOpcode::G_EXTRACT: if (TypeIdx != 1) return UnableToLegalize; @@ -4873,6 +4882,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_INSERT: + case TargetOpcode::G_INSERT_VECTOR_ELT: case TargetOpcode::G_FREEZE: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: @@ -4887,10 +4897,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SELECT: { - Register DstReg = MI.getOperand(0).getReg(); - Register CondReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT CondTy = MRI.getType(CondReg); + auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs(); if (TypeIdx == 1) { if (!CondTy.isScalar() || DstTy.getElementCount() != MoreTy.getElementCount()) @@ -4943,28 +4950,50 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FPEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + LLT SrcTy = LLT::fixed_vector( + MoreTy.getNumElements(), + MRI.getType(MI.getOperand(1).getReg()).getElementType()); + moreElementsVectorSrc(MI, SrcTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } default: return UnableToLegalize; } } -/// Expand source vectors to the size of destination vector. -static LegalizerHelper::LegalizeResult -equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) { - MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); - - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); +LegalizerHelper::LegalizeResult +LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) { + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); unsigned MaskNumElts = Mask.size(); unsigned SrcNumElts = SrcTy.getNumElements(); - Register DstReg = MI.getOperand(0).getReg(); LLT DestEltTy = DstTy.getElementType(); - // TODO: Normalize the shuffle vector since mask and vector length don't - // match. - if (MaskNumElts <= SrcNumElts) { - return LegalizerHelper::LegalizeResult::UnableToLegalize; + if (MaskNumElts == SrcNumElts) + return Legalized; + + if (MaskNumElts < SrcNumElts) { + // Extend mask to match new destination vector size with + // undef values. + SmallVector<int, 16> NewMask(Mask); + for (unsigned I = MaskNumElts; I < SrcNumElts; ++I) + NewMask.push_back(-1); + + moreElementsVectorDst(MI, SrcTy, 0); + MIRBuilder.setInstrAndDebugLoc(MI); + MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg(), + MI.getOperand(2).getReg(), NewMask); + MI.eraseFromParent(); + + return Legalized; } unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); @@ -5014,19 +5043,14 @@ equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) { LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI, unsigned int TypeIdx, LLT MoreTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register Src1Reg = MI.getOperand(1).getReg(); - Register Src2Reg = MI.getOperand(2).getReg(); + auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs(); ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); - LLT DstTy = MRI.getType(DstReg); - LLT Src1Ty = MRI.getType(Src1Reg); - LLT Src2Ty = MRI.getType(Src2Reg); unsigned NumElts = DstTy.getNumElements(); unsigned WidenNumElts = MoreTy.getNumElements(); if (DstTy.isVector() && Src1Ty.isVector() && - DstTy.getNumElements() > Src1Ty.getNumElements()) { - return equalizeVectorShuffleLengths(MI, MIRBuilder); + DstTy.getNumElements() != Src1Ty.getNumElements()) { + return equalizeVectorShuffleLengths(MI); } if (TypeIdx != 0) @@ -5218,9 +5242,7 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register Src1 = MI.getOperand(1).getReg(); - Register Src2 = MI.getOperand(2).getReg(); + auto [DstReg, Src1, Src2] = MI.getFirst3Regs(); LLT Ty = MRI.getType(DstReg); if (Ty.isVector()) @@ -5471,8 +5493,7 @@ LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 0) return UnableToLegalize; - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); + auto [DstReg, SrcReg] = MI.getFirst2Regs(); LLT DstTy = MRI.getType(DstReg); if (DstTy.isVector()) @@ -5539,10 +5560,7 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 1) return UnableToLegalize; - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned NarrowSize = NarrowTy.getSizeInBits(); if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { @@ -5575,10 +5593,7 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 1) return UnableToLegalize; - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned NarrowSize = NarrowTy.getSizeInBits(); if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { @@ -5611,9 +5626,7 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 1) return UnableToLegalize; - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned NarrowSize = NarrowTy.getSizeInBits(); if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { @@ -5631,6 +5644,31 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + MachineIRBuilder &B = MIRBuilder; + Register ExpReg = MI.getOperand(2).getReg(); + LLT ExpTy = MRI.getType(ExpReg); + + unsigned ClampSize = NarrowTy.getScalarSizeInBits(); + + // Clamp the exponent to the range of the target type. + auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize)); + auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp); + auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize)); + auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp); + + auto Trunc = B.buildTrunc(NarrowTy, Clamp); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(Trunc.getReg(0)); + Observer.changedInstr(MI); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); const auto &TII = MIRBuilder.getTII(); @@ -5649,10 +5687,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { return Legalized; } case TargetOpcode::G_CTLZ: { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned Len = SrcTy.getSizeInBits(); if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) { @@ -5699,10 +5734,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { return Legalized; } case TargetOpcode::G_CTTZ: { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned Len = SrcTy.getSizeInBits(); if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) { @@ -5808,10 +5840,7 @@ static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, LegalizerHelper::LegalizeResult LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register X = MI.getOperand(1).getReg(); - Register Y = MI.getOperand(2).getReg(); - Register Z = MI.getOperand(3).getReg(); + auto [Dst, X, Y, Z] = MI.getFirst4Regs(); LLT Ty = MRI.getType(Dst); LLT ShTy = MRI.getType(Z); @@ -5850,10 +5879,7 @@ LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register X = MI.getOperand(1).getReg(); - Register Y = MI.getOperand(2).getReg(); - Register Z = MI.getOperand(3).getReg(); + auto [Dst, X, Y, Z] = MI.getFirst4Regs(); LLT Ty = MRI.getType(Dst); LLT ShTy = MRI.getType(Z); @@ -5932,10 +5958,7 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Amt = MI.getOperand(2).getReg(); - LLT AmtTy = MRI.getType(Amt); + auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs(); auto Zero = MIRBuilder.buildConstant(AmtTy, 0); bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL; unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL; @@ -5946,12 +5969,7 @@ LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) { } LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Amt = MI.getOperand(2).getReg(); - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src); - LLT AmtTy = MRI.getType(Amt); + auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs(); unsigned EltSizeInBits = DstTy.getScalarSizeInBits(); bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL; @@ -6021,8 +6039,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) { // representation. LegalizerHelper::LegalizeResult LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); + auto [Dst, Src] = MI.getFirst2Regs(); const LLT S64 = LLT::scalar(64); const LLT S32 = LLT::scalar(32); const LLT S1 = LLT::scalar(1); @@ -6077,10 +6094,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { } LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src); + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); if (SrcTy == LLT::scalar(1)) { auto True = MIRBuilder.buildFConstant(DstTy, 1.0); @@ -6105,10 +6119,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { } LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src); + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); const LLT S64 = LLT::scalar(64); const LLT S32 = LLT::scalar(32); @@ -6151,10 +6162,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { } LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src); + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); const LLT S64 = LLT::scalar(64); const LLT S32 = LLT::scalar(32); @@ -6194,10 +6202,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { } LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src); + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); const LLT S64 = LLT::scalar(64); const LLT S32 = LLT::scalar(32); @@ -6263,17 +6268,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { // f64 -> f16 conversion using round-to-nearest-even rounding mode. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); + const LLT S1 = LLT::scalar(1); + const LLT S32 = LLT::scalar(32); + + auto [Dst, Src] = MI.getFirst2Regs(); + assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) && + MRI.getType(Src).getScalarType() == LLT::scalar(64)); if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. return UnableToLegalize; + if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) { + unsigned Flags = MI.getFlags(); + auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags); + MIRBuilder.buildFPTrunc(Dst, Src32, Flags); + MI.eraseFromParent(); + return Legalized; + } + const unsigned ExpMask = 0x7ff; const unsigned ExpBiasf64 = 1023; const unsigned ExpBiasf16 = 15; - const LLT S32 = LLT::scalar(32); - const LLT S1 = LLT::scalar(1); auto Unmerge = MIRBuilder.buildUnmerge(S32, Src); Register U = Unmerge.getReg(0); @@ -6368,11 +6383,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src); + auto [DstTy, SrcTy] = MI.getFirst2LLTs(); const LLT S64 = LLT::scalar(64); const LLT S16 = LLT::scalar(16); @@ -6385,9 +6396,7 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { // TODO: If RHS is a constant SelectionDAGBuilder expands this into a // multiplication tree. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); + auto [Dst, Src0, Src1] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Dst); auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); @@ -6412,9 +6421,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) { } LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); + auto [Dst, Src0, Src1] = MI.getFirst3Regs(); const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); LLT CmpType = MRI.getType(Dst).changeElementSize(1); @@ -6428,13 +6435,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerFCopySign(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); - - const LLT Src0Ty = MRI.getType(Src0); - const LLT Src1Ty = MRI.getType(Src1); - + auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs(); const int Src0Size = Src0Ty.getScalarSizeInBits(); const int Src1Size = Src1Ty.getScalarSizeInBits(); @@ -6475,9 +6476,7 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); + auto [Dst, Src0, Src1] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Dst); if (!MI.getFlag(MachineInstr::FmNoNans)) { @@ -6516,8 +6515,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); - Register X = MI.getOperand(1).getReg(); + auto [DstReg, X] = MI.getFirst2Regs(); const unsigned Flags = MI.getFlags(); const LLT Ty = MRI.getType(DstReg); const LLT CondTy = Ty.changeElementSize(1); @@ -6547,10 +6545,8 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFFloor(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) { + auto [DstReg, SrcReg] = MI.getFirst2Regs(); unsigned Flags = MI.getFlags(); LLT Ty = MRI.getType(DstReg); const LLT CondTy = Ty.changeElementSize(1); @@ -6577,11 +6573,8 @@ LegalizerHelper::lowerFFloor(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerMergeValues(MachineInstr &MI) { const unsigned NumOps = MI.getNumOperands(); - Register DstReg = MI.getOperand(0).getReg(); - Register Src0Reg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(Src0Reg); - unsigned PartSize = SrcTy.getSizeInBits(); + auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs(); + unsigned PartSize = Src0Ty.getSizeInBits(); LLT WideTy = LLT::scalar(DstTy.getSizeInBits()); Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0); @@ -6729,11 +6722,8 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); - Register Src0Reg = MI.getOperand(1).getReg(); - Register Src1Reg = MI.getOperand(2).getReg(); - LLT Src0Ty = MRI.getType(Src0Reg); - LLT DstTy = MRI.getType(DstReg); + auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] = + MI.getFirst3RegLLTs(); LLT IdxTy = LLT::scalar(32); ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); @@ -6822,13 +6812,9 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerExtract(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned Offset = MI.getOperand(2).getImm(); - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src); - // Extract sub-vector or one element if (SrcTy.isVector()) { unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); @@ -6837,7 +6823,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) && (Offset + DstSize <= SrcTy.getSizeInBits())) { // Unmerge and allow access to each Src element for the artifact combiner. - auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src); + auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg); // Take element(s) we need to extract and copy it (merge them). SmallVector<Register, 8> SubVectorElts; @@ -6846,9 +6832,9 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { SubVectorElts.push_back(Unmerge.getReg(Idx)); } if (SubVectorElts.size() == 1) - MIRBuilder.buildCopy(Dst, SubVectorElts[0]); + MIRBuilder.buildCopy(DstReg, SubVectorElts[0]); else - MIRBuilder.buildMergeLikeInstr(Dst, SubVectorElts); + MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts); MI.eraseFromParent(); return Legalized; @@ -6861,15 +6847,15 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { LLT SrcIntTy = SrcTy; if (!SrcTy.isScalar()) { SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); - Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); + SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0); } if (Offset == 0) - MIRBuilder.buildTrunc(Dst, Src); + MIRBuilder.buildTrunc(DstReg, SrcReg); else { auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); - auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); - MIRBuilder.buildTrunc(Dst, Shr); + auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt); + MIRBuilder.buildTrunc(DstReg, Shr); } MI.eraseFromParent(); @@ -6880,9 +6866,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { } LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register InsertSrc = MI.getOperand(2).getReg(); + auto [Dst, Src, InsertSrc] = MI.getFirst3Regs(); uint64_t Offset = MI.getOperand(3).getImm(); LLT DstTy = MRI.getType(Src); @@ -6972,14 +6956,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { - Register Dst0 = MI.getOperand(0).getReg(); - Register Dst1 = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); + auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] = + MI.getFirst4RegLLTs(); const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; - LLT Ty = MRI.getType(Dst0); - LLT BoolTy = MRI.getType(Dst1); + LLT Ty = Dst0Ty; + LLT BoolTy = Dst1Ty; if (IsAdd) MIRBuilder.buildAdd(Dst0, LHS, RHS); @@ -7008,9 +6990,7 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); + auto [Res, LHS, RHS] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Res); bool IsSigned; bool IsAdd; @@ -7085,9 +7065,7 @@ LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); + auto [Res, LHS, RHS] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Res); LLT BoolTy = Ty.changeElementSize(1); bool IsSigned; @@ -7157,9 +7135,7 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) { MI.getOpcode() == TargetOpcode::G_USHLSAT) && "Expected shlsat opcode!"); bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); + auto [Res, LHS, RHS] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Res); LLT BoolTy = Ty.changeElementSize(1); @@ -7185,10 +7161,8 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) { return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerBswap(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); const LLT Ty = MRI.getType(Src); unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8; unsigned BaseShiftAmt = (SizeInBytes - 1) * 8; @@ -7233,8 +7207,7 @@ static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitreverse(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); + auto [Dst, Src] = MI.getFirst2Regs(); const LLT Ty = MRI.getType(Src); unsigned Size = Ty.getSizeInBits(); @@ -7312,23 +7285,23 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); - uint64_t Mask = MI.getOperand(2).getImm(); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm()); - if (Mask == 0) { + if (Mask == fcNone) { MIRBuilder.buildConstant(DstReg, 0); MI.eraseFromParent(); return Legalized; } - if ((Mask & fcAllFlags) == fcAllFlags) { + if (Mask == fcAllFlags) { MIRBuilder.buildConstant(DstReg, 1); MI.eraseFromParent(); return Legalized; } + // TODO: Try inverting the test with getInvertedFPClassTest like the DAG + // version + unsigned BitSize = SrcTy.getScalarSizeInBits(); const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType()); @@ -7345,7 +7318,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf; APInt QNaNBitMask = APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1); - APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits()); + APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits()); auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit); auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask); @@ -7358,8 +7331,10 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs); auto Res = MIRBuilder.buildConstant(DstTy, 0); + // Clang doesn't support capture of structured bindings: + LLT DstTyCopy = DstTy; const auto appendToRes = [&](MachineInstrBuilder ToAppend) { - Res = MIRBuilder.buildOr(DstTy, Res, ToAppend); + Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend); }; // Tests that involve more than one class should be processed first. @@ -7382,8 +7357,20 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { Mask &= ~fcNegFinite; } + if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) { + // fcZero | fcSubnormal => test all exponent bits are 0 + // TODO: Handle sign bit specific cases + // TODO: Handle inverted case + if (PartialCheck == (fcZero | fcSubnormal)) { + auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC); + appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, + ExpBits, ZeroC)); + Mask &= ~PartialCheck; + } + } + // Check for individual classes. - if (unsigned PartialCheck = Mask & fcZero) { + if (FPClassTest PartialCheck = Mask & fcZero) { if (PartialCheck == fcPosZero) appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, AsInt, ZeroC)); @@ -7395,7 +7382,21 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { AsInt, SignBitC)); } - if (unsigned PartialCheck = Mask & fcInf) { + if (FPClassTest PartialCheck = Mask & fcSubnormal) { + // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set) + // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set) + auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs; + auto OneC = MIRBuilder.buildConstant(IntTy, 1); + auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC); + auto SubnormalRes = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne, + MIRBuilder.buildConstant(IntTy, AllOneMantissa)); + if (PartialCheck == fcNegSubnormal) + SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign); + appendToRes(SubnormalRes); + } + + if (FPClassTest PartialCheck = Mask & fcInf) { if (PartialCheck == fcPosInf) appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, AsInt, InfC)); @@ -7410,7 +7411,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { } } - if (unsigned PartialCheck = Mask & fcNan) { + if (FPClassTest PartialCheck = Mask & fcNan) { auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask); if (PartialCheck == fcNan) { // isnan(V) ==> abs(V) u> int(inf) @@ -7431,21 +7432,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { } } - if (unsigned PartialCheck = Mask & fcSubnormal) { - // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set) - // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set) - auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs; - auto OneC = MIRBuilder.buildConstant(IntTy, 1); - auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC); - auto SubnormalRes = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne, - MIRBuilder.buildConstant(IntTy, AllOneMantissa)); - if (PartialCheck == fcNegSubnormal) - SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign); - appendToRes(SubnormalRes); - } - - if (unsigned PartialCheck = Mask & fcNormal) { + if (FPClassTest PartialCheck = Mask & fcNormal) { // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u< // (max_exp-1)) APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); @@ -7472,12 +7459,8 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { // Implement vector G_SELECT in terms of XOR, AND, OR. - Register DstReg = MI.getOperand(0).getReg(); - Register MaskReg = MI.getOperand(1).getReg(); - Register Op1Reg = MI.getOperand(2).getReg(); - Register Op2Reg = MI.getOperand(3).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT MaskTy = MRI.getType(MaskReg); + auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] = + MI.getFirst4RegLLTs(); if (!DstTy.isVector()) return UnableToLegalize; @@ -7591,7 +7574,7 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) { Observer.changedInstr(MI); return Legalized; } - return UnableToLegalize;; + return UnableToLegalize; } static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { @@ -7638,7 +7621,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, // SDAGisms map cleanly to GISel concepts. if (NewTy.isVector()) NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); - NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); + NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1)); unsigned NewTySize = NewTy.getSizeInBytes(); assert(NewTySize > 0 && "Could not find appropriate type"); @@ -7826,9 +7809,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Len = MI.getOperand(2).getReg(); + auto [Dst, Src, Len] = MI.getFirst3Regs(); const auto *MMOIt = MI.memoperands_begin(); const MachineMemOperand *MemOp = *MMOIt; @@ -8091,9 +8072,7 @@ LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { Align DstAlign = MemOp->getBaseAlign(); Align SrcAlign; - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Len = MI.getOperand(2).getReg(); + auto [Dst, Src, Len] = MI.getFirst3Regs(); if (Opc != TargetOpcode::G_MEMSET) { assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 4b6c3a156709..1f2e481c63e0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LowLevelTypeImpl.h" #include <algorithm> using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index 7c6eac8c8ce0..49f40495d6fc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -10,6 +10,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" @@ -18,7 +20,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -305,7 +307,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { const auto &DL = MF->getFunction().getParent()->getDataLayout(); bool AnyMerged = false; do { - unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size()); + unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size()); unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue(); // Compute the biggest store we can generate to handle the number of stores. unsigned MergeSizeBits; @@ -400,7 +402,9 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { auto NewStore = Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO); (void) NewStore; - LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore); + LLVM_DEBUG(dbgs() << "Merged " << Stores.size() + << " stores into merged store: " << *NewStore); + LLVM_DEBUG(for (auto *MI : Stores) dbgs() << " " << *MI;); NumStoresMerged += Stores.size(); MachineOptimizationRemarkEmitter MORE(*MF, nullptr); @@ -445,20 +449,19 @@ bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) { for (auto AliasInfo : reverse(C.PotentialAliases)) { MachineInstr *PotentialAliasOp = AliasInfo.first; unsigned PreCheckedIdx = AliasInfo.second; - if (static_cast<unsigned>(Idx) > PreCheckedIdx) { - // Need to check this alias. - if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI, - AA)) { - LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp - << " detected\n"); - return true; - } - } else { + if (static_cast<unsigned>(Idx) < PreCheckedIdx) { // Once our store index is lower than the index associated with the // potential alias, we know that we've already checked for this alias // and all of the earlier potential aliases too. return false; } + // Need to check this alias. + if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI, + AA)) { + LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp + << " detected\n"); + return true; + } } return false; }; @@ -616,11 +619,304 @@ bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) { return Changed; } +/// Check if the store \p Store is a truncstore that can be merged. That is, +/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty +/// Register then it does not need to match and SrcVal is set to the source +/// value found. +/// On match, returns the start byte offset of the \p SrcVal that is being +/// stored. +static std::optional<int64_t> +getTruncStoreByteOffset(GStore &Store, Register &SrcVal, + MachineRegisterInfo &MRI) { + Register TruncVal; + if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) + return std::nullopt; + + // The shift amount must be a constant multiple of the narrow type. + // It is translated to the offset address in the wide source value "y". + // + // x = G_LSHR y, ShiftAmtC + // s8 z = G_TRUNC x + // store z, ... + Register FoundSrcVal; + int64_t ShiftAmt; + if (!mi_match(TruncVal, MRI, + m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), + m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { + if (!SrcVal.isValid() || TruncVal == SrcVal) { + if (!SrcVal.isValid()) + SrcVal = TruncVal; + return 0; // If it's the lowest index store. + } + return std::nullopt; + } + + unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); + if (ShiftAmt % NarrowBits != 0) + return std::nullopt; + const unsigned Offset = ShiftAmt / NarrowBits; + + if (SrcVal.isValid() && FoundSrcVal != SrcVal) + return std::nullopt; + + if (!SrcVal.isValid()) + SrcVal = FoundSrcVal; + else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) + return std::nullopt; + return Offset; +} + +/// Match a pattern where a wide type scalar value is stored by several narrow +/// stores. Fold it into a single store or a BSWAP and a store if the targets +/// supports it. +/// +/// Assuming little endian target: +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 0) & 0xFF; +/// p[1] = (val >> 8) & 0xFF; +/// p[2] = (val >> 16) & 0xFF; +/// p[3] = (val >> 24) & 0xFF; +/// => +/// *((i32)p) = val; +/// +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 24) & 0xFF; +/// p[1] = (val >> 16) & 0xFF; +/// p[2] = (val >> 8) & 0xFF; +/// p[3] = (val >> 0) & 0xFF; +/// => +/// *((i32)p) = BSWAP(val); +bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI, + SmallPtrSetImpl<GStore *> &DeletedStores) { + LLT MemTy = StoreMI.getMMO().getMemoryType(); + + // We only handle merging simple stores of 1-4 bytes. + if (!MemTy.isScalar()) + return false; + switch (MemTy.getSizeInBits()) { + case 8: + case 16: + case 32: + break; + default: + return false; + } + if (!StoreMI.isSimple()) + return false; + + // We do a simple search for mergeable stores prior to this one. + // Any potential alias hazard along the way terminates the search. + SmallVector<GStore *> FoundStores; + + // We're looking for: + // 1) a (store(trunc(...))) + // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get + // the partial value stored. + // 3) where the offsets form either a little or big-endian sequence. + + auto &LastStore = StoreMI; + + // The single base pointer that all stores must use. + Register BaseReg; + int64_t LastOffset; + if (!mi_match(LastStore.getPointerReg(), *MRI, + m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { + BaseReg = LastStore.getPointerReg(); + LastOffset = 0; + } + + GStore *LowestIdxStore = &LastStore; + int64_t LowestIdxOffset = LastOffset; + + Register WideSrcVal; + auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI); + if (!LowestShiftAmt) + return false; // Didn't match a trunc. + assert(WideSrcVal.isValid()); + + LLT WideStoreTy = MRI->getType(WideSrcVal); + // The wide type might not be a multiple of the memory type, e.g. s48 and s32. + if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) + return false; + const unsigned NumStoresRequired = + WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); + + SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX); + OffsetMap[*LowestShiftAmt] = LastOffset; + FoundStores.emplace_back(&LastStore); + + const int MaxInstsToCheck = 10; + int NumInstsChecked = 0; + for (auto II = ++LastStore.getReverseIterator(); + II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; + ++II) { + NumInstsChecked++; + GStore *NewStore; + if ((NewStore = dyn_cast<GStore>(&*II))) { + if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) + break; + } else if (II->isLoadFoldBarrier() || II->mayLoad()) { + break; + } else { + continue; // This is a safe instruction we can look past. + } + + Register NewBaseReg; + int64_t MemOffset; + // Check we're storing to the same base + some offset. + if (!mi_match(NewStore->getPointerReg(), *MRI, + m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { + NewBaseReg = NewStore->getPointerReg(); + MemOffset = 0; + } + if (BaseReg != NewBaseReg) + break; + + auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI); + if (!ShiftByteOffset) + break; + if (MemOffset < LowestIdxOffset) { + LowestIdxOffset = MemOffset; + LowestIdxStore = NewStore; + } + + // Map the offset in the store and the offset in the combined value, and + // early return if it has been set before. + if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || + OffsetMap[*ShiftByteOffset] != INT64_MAX) + break; + OffsetMap[*ShiftByteOffset] = MemOffset; + + FoundStores.emplace_back(NewStore); + // Reset counter since we've found a matching inst. + NumInstsChecked = 0; + if (FoundStores.size() == NumStoresRequired) + break; + } + + if (FoundStores.size() != NumStoresRequired) { + if (FoundStores.size() == 1) + return false; + // We didn't find enough stores to merge into the size of the original + // source value, but we may be able to generate a smaller store if we + // truncate the source value. + WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits()); + } + + unsigned NumStoresFound = FoundStores.size(); + + const auto &DL = LastStore.getMF()->getDataLayout(); + auto &C = LastStore.getMF()->getFunction().getContext(); + // Check that a store of the wide type is both allowed and fast on the target + unsigned Fast = 0; + bool Allowed = TLI->allowsMemoryAccess( + C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); + if (!Allowed || !Fast) + return false; + + // Check if the pieces of the value are going to the expected places in memory + // to merge the stores. + unsigned NarrowBits = MemTy.getScalarSizeInBits(); + auto checkOffsets = [&](bool MatchLittleEndian) { + if (MatchLittleEndian) { + for (unsigned i = 0; i != NumStoresFound; ++i) + if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) + return false; + } else { // MatchBigEndian by reversing loop counter. + for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound; + ++i, --j) + if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) + return false; + } + return true; + }; + + // Check if the offsets line up for the native data layout of this target. + bool NeedBswap = false; + bool NeedRotate = false; + if (!checkOffsets(DL.isLittleEndian())) { + // Special-case: check if byte offsets line up for the opposite endian. + if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) + NeedBswap = true; + else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian())) + NeedRotate = true; + else + return false; + } + + if (NeedBswap && + !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF)) + return false; + if (NeedRotate && + !isLegalOrBeforeLegalizer( + {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF)) + return false; + + Builder.setInstrAndDebugLoc(StoreMI); + + if (WideStoreTy != MRI->getType(WideSrcVal)) + WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0); + + if (NeedBswap) { + WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); + } else if (NeedRotate) { + assert(WideStoreTy.getSizeInBits() % 2 == 0 && + "Unexpected type for rotate"); + auto RotAmt = + Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); + WideSrcVal = + Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); + } + + Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(), + LowestIdxStore->getMMO().getPointerInfo(), + LowestIdxStore->getMMO().getAlign()); + + // Erase the old stores. + for (auto *ST : FoundStores) { + ST->eraseFromParent(); + DeletedStores.insert(ST); + } + return true; +} + +bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) { + bool Changed = false; + SmallVector<GStore *, 16> Stores; + SmallPtrSet<GStore *, 8> DeletedStores; + // Walk up the block so we can see the most eligible stores. + for (MachineInstr &MI : llvm::reverse(BB)) + if (auto *StoreMI = dyn_cast<GStore>(&MI)) + Stores.emplace_back(StoreMI); + + for (auto *StoreMI : Stores) { + if (DeletedStores.count(StoreMI)) + continue; + if (mergeTruncStore(*StoreMI, DeletedStores)) + Changed = true; + } + return Changed; +} + bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) { bool Changed = false; - for (auto &BB : MF) { + for (auto &BB : MF){ Changed |= mergeBlockStores(BB); + Changed |= mergeTruncStoresBlock(BB); + } + + // Erase all dead instructions left over by the merging. + if (Changed) { + for (auto &BB : MF) { + for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) { + if (isTriviallyDead(I, *MRI)) + I.eraseFromParent(); + } + } } + return Changed; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index bf4dcc2c2459..55984423e5bc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -54,7 +54,7 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, MachineInstr &MIUse = *MOUse.getParent(); InsertMBB = MIUse.getParent(); if (MIUse.isPHI()) - InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB(); + InsertMBB = MIUse.getOperand(MOUse.getOperandNo() + 1).getMBB(); return InsertMBB == Def.getParent(); } @@ -99,7 +99,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, MachineBasicBlock *InsertMBB; LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); dbgs() << "Checking use: " << MIUse - << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + << " #Opd: " << MOUse.getOperandNo() << '\n'); if (isLocalUse(MOUse, MI, InsertMBB)) { // Even if we're in the same block, if the block is very large we could // still have many long live ranges. Try to do intra-block localization diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 9100e064f30f..962b54ec5d6b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -80,11 +80,11 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, assert( cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); - return buildInstr(TargetOpcode::DBG_VALUE) - .addFrameIndex(FI) - .addImm(0) - .addMetadata(Variable) - .addMetadata(Expr); + return insertInstr(buildInstrNoInsert(TargetOpcode::DBG_VALUE) + .addFrameIndex(FI) + .addImm(0) + .addMetadata(Variable) + .addMetadata(Expr)); } MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, @@ -164,6 +164,15 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res, return MIB; } +MachineInstrBuilder MachineIRBuilder::buildConstantPool(const DstOp &Res, + unsigned Idx) { + assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); + auto MIB = buildInstr(TargetOpcode::G_CONSTANT_POOL); + Res.addDefToMIB(*getMRI(), MIB); + MIB.addConstantPoolIndex(Idx); + return MIB; +} + MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy, unsigned JTI) { return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {}) @@ -229,17 +238,25 @@ MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res, LLT ResTy = Res.getLLTTy(*getMRI()); LLT Op0Ty = Op0.getLLTTy(*getMRI()); - assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); - assert((ResTy.getElementType() == Op0Ty.getElementType()) && - "Different vector element types"); - assert((ResTy.getNumElements() > Op0Ty.getNumElements()) && - "Op0 has more elements"); + assert(ResTy.isVector() && "Res non vector type"); - auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); SmallVector<Register, 8> Regs; - for (auto Op : Unmerge.getInstr()->defs()) - Regs.push_back(Op.getReg()); - Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0); + if (Op0Ty.isVector()) { + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() > Op0Ty.getNumElements()) && + "Op0 has more elements"); + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + + for (auto Op : Unmerge.getInstr()->defs()) + Regs.push_back(Op.getReg()); + } else { + assert((ResTy.getSizeInBits() > Op0Ty.getSizeInBits()) && + "Op0 has more size"); + Regs.push_back(Op0.getReg()); + } + Register Undef = + buildUndef(Op0Ty.isVector() ? Op0Ty.getElementType() : Op0Ty).getReg(0); unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size(); for (unsigned i = 0; i < NumberOfPadElts; ++i) Regs.push_back(Undef); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 080f3ca540f2..885a1056b2ea 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -69,8 +69,8 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) -RegBankSelect::RegBankSelect(Mode RunningMode) - : MachineFunctionPass(ID), OptMode(RunningMode) { +RegBankSelect::RegBankSelect(char &PassID, Mode RunningMode) + : MachineFunctionPass(PassID), OptMode(RunningMode) { if (RegBankSelectMode.getNumOccurrences() != 0) { OptMode = RegBankSelectMode; if (RegBankSelectMode != RunningMode) @@ -162,8 +162,10 @@ bool RegBankSelect::repairReg( MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY) .addDef(Dst) .addUse(Src); - LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) - << '\n'); + LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << ':' + << printRegClassOrBank(Src, *MRI, TRI) + << " to: " << printReg(Dst) << ':' + << printRegClassOrBank(Dst, *MRI, TRI) << '\n'); } else { // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT // sequence. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 07448548c295..080600d3cc98 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -230,10 +230,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, return false; // Instructions without side-effects are dead iff they only define dead vregs. - for (const auto &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; - + for (const auto &MO : MI.all_defs()) { Register Reg = MO.getReg(); if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg)) return false; @@ -711,14 +708,14 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, Align llvm::inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO) { - auto PSV = MPO.V.dyn_cast<const PseudoSourceValue *>(); + auto PSV = dyn_cast_if_present<const PseudoSourceValue *>(MPO.V); if (auto FSPV = dyn_cast_or_null<FixedStackPseudoSourceValue>(PSV)) { MachineFrameInfo &MFI = MF.getFrameInfo(); return commonAlignment(MFI.getObjectAlign(FSPV->getFrameIndex()), MPO.Offset); } - if (const Value *V = MPO.V.dyn_cast<const Value *>()) { + if (const Value *V = dyn_cast_if_present<const Value *>(MPO.V)) { const Module *M = MF.getFunction().getParent(); return V->getPointerAlignment(M->getDataLayout()); } @@ -797,7 +794,7 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) { auto MaybeCst = getIConstantVRegVal(R, MRI); if (!MaybeCst) return std::nullopt; - return MaybeCst->countLeadingZeros(); + return MaybeCst->countl_zero(); }; if (Ty.isVector()) { // Try to constant fold each element. |