aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-09-02 21:17:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-12-08 17:34:50 +0000
commit06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch)
tree62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/CodeGen/GlobalISel
parentcf037972ea8863e2bab7461d77345367d2c1e054 (diff)
parent7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)
downloadsrc-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.tar.gz
src-06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e.zip
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/GlobalISel')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp535
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp571
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp322
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp11
18 files changed, 1051 insertions, 812 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 356d208fc881..e047996f9aa8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -217,10 +217,14 @@ void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) {
}
void GISelCSEInfo::handleRecordedInsts() {
+ if (HandlingRecordedInstrs)
+ return;
+ HandlingRecordedInstrs = true;
while (!TemporaryInsts.empty()) {
auto *MI = TemporaryInsts.pop_back_val();
handleRecordedInst(MI);
}
+ HandlingRecordedInstrs = false;
}
bool GISelCSEInfo::shouldCSE(unsigned Opc) const {
@@ -392,9 +396,10 @@ GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
addNodeIDRegType(Ty);
if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
- if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
+ if (const auto *RB = dyn_cast_if_present<const RegisterBank *>(RCOrRB))
addNodeIDRegType(RB);
- else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ else if (const auto *RC =
+ dyn_cast_if_present<const TargetRegisterClass *>(RCOrRB))
addNodeIDRegType(RC);
}
return *this;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 89872259cfca..28c33e2038e4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -846,7 +846,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
- LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL);
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
@@ -876,8 +876,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
unsigned AS = DL.getAllocaAddrSpace();
- LLT OffsetLLTy =
- getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL);
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getPointerTo(AS)), DL);
MachinePointerInfo PtrInfo(AS);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index af4bb1634746..cc7fb3ee1109 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -399,7 +399,8 @@ namespace {
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
-PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
+PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
+ PreferredTuple &CurrentUse,
const LLT TyForCandidate,
unsigned OpcodeForCandidate,
MachineInstr *MIForCandidate) {
@@ -425,8 +426,10 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
// Prefer sign extensions to zero extensions as sign-extensions tend to be
- // more expensive.
- if (CurrentUse.Ty == TyForCandidate) {
+ // more expensive. Don't do this if the load is already a zero-extend load
+ // though, otherwise we'll rewrite a zero-extend load into a sign-extend
+ // later.
+ if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
OpcodeForCandidate == TargetOpcode::G_ZEXT)
return CurrentUse;
@@ -535,7 +538,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
// For non power-of-2 types, they will very likely be legalized into multiple
// loads. Don't bother trying to match them into extending loads.
- if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
+ if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
return false;
// Find the preferred type aside from the any-extends (unless it's the only
@@ -566,7 +569,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
.Action != LegalizeActions::Legal)
continue;
}
- Preferred = ChoosePreferredUse(Preferred,
+ Preferred = ChoosePreferredUse(MI, Preferred,
MRI.getType(UseMI.getOperand(0).getReg()),
UseMI.getOpcode(), &UseMI);
}
@@ -727,7 +730,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
Register PtrReg = LoadMI->getPointerReg();
unsigned RegSize = RegTy.getSizeInBits();
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
- unsigned MaskSizeBits = MaskVal.countTrailingOnes();
+ unsigned MaskSizeBits = MaskVal.countr_one();
// The mask may not be larger than the in-memory type, as it might cover sign
// extended bits
@@ -1189,16 +1192,22 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
// Check which instruction is first in the block so we don't break def-use
- // deps by "moving" the instruction incorrectly.
- if (dominates(MI, *OtherMI))
+ // deps by "moving" the instruction incorrectly. Also keep track of which
+ // instruction is first so we pick it's operands, avoiding use-before-def
+ // bugs.
+ MachineInstr *FirstInst;
+ if (dominates(MI, *OtherMI)) {
Builder.setInstrAndDebugLoc(MI);
- else
+ FirstInst = &MI;
+ } else {
Builder.setInstrAndDebugLoc(*OtherMI);
+ FirstInst = OtherMI;
+ }
Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
: TargetOpcode::G_UDIVREM,
{DestDivReg, DestRemReg},
- {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
+ { FirstInst->getOperand(1), FirstInst->getOperand(2) });
MI.eraseFromParent();
OtherMI->eraseFromParent();
}
@@ -1285,65 +1294,57 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
LegalizerHelper::LegalizeResult::Legalized;
}
-static std::optional<APFloat>
-constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op,
- const MachineRegisterInfo &MRI) {
- const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
- if (!MaybeCst)
- return std::nullopt;
-
- APFloat V = MaybeCst->getValueAPF();
- switch (Opcode) {
+static APFloat constantFoldFpUnary(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const APFloat &Val) {
+ APFloat Result(Val);
+ switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
case TargetOpcode::G_FNEG: {
- V.changeSign();
- return V;
+ Result.changeSign();
+ return Result;
}
case TargetOpcode::G_FABS: {
- V.clearSign();
- return V;
+ Result.clearSign();
+ return Result;
+ }
+ case TargetOpcode::G_FPTRUNC: {
+ bool Unused;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Result.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return Result;
}
- case TargetOpcode::G_FPTRUNC:
- break;
case TargetOpcode::G_FSQRT: {
bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(sqrt(V.convertToDouble()));
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(sqrt(Result.convertToDouble()));
break;
}
case TargetOpcode::G_FLOG2: {
bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(log2(V.convertToDouble()));
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(log2(Result.convertToDouble()));
break;
}
}
// Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
- // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
- // and `G_FLOG2` reach here.
+ // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
+ // `G_FLOG2` reach here.
bool Unused;
- V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
- return V;
+ Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
+ return Result;
}
-bool CombinerHelper::matchCombineConstantFoldFpUnary(
- MachineInstr &MI, std::optional<APFloat> &Cst) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
- return Cst.has_value();
-}
-
-void CombinerHelper::applyCombineConstantFoldFpUnary(
- MachineInstr &MI, std::optional<APFloat> &Cst) {
- assert(Cst && "Optional is unexpectedly empty!");
+void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ const ConstantFP *Cst) {
Builder.setInstrAndDebugLoc(MI);
- MachineFunction &MF = Builder.getMF();
- auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
- Register DstReg = MI.getOperand(0).getReg();
- Builder.buildFConstant(DstReg, *FPVal);
+ APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
+ const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
+ Builder.buildFConstant(MI.getOperand(0), *NewCst);
MI.eraseFromParent();
}
@@ -1621,6 +1622,41 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
MI.eraseFromParent();
}
+bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
+ // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+ auto &Shl = cast<GenericMachineInstr>(MI);
+ Register DstReg = Shl.getReg(0);
+ Register SrcReg = Shl.getReg(1);
+ Register ShiftReg = Shl.getReg(2);
+ Register X, C1;
+
+ if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
+ return false;
+
+ if (!mi_match(SrcReg, MRI,
+ m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X), m_Reg(C1)),
+ m_GOr(m_Reg(X), m_Reg(C1))))))
+ return false;
+
+ APInt C1Val, C2Val;
+ if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
+ !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
+ return false;
+
+ auto *SrcDef = MRI.getVRegDef(SrcReg);
+ assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
+ SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
+ LLT SrcTy = MRI.getType(SrcReg);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto S1 = B.buildShl(SrcTy, X, ShiftReg);
+ auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
+ B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
@@ -1658,9 +1694,9 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
!mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
return false;
- // TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
+ MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
+ auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -1675,12 +1711,13 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
return false;
}
- int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
MatchData.Reg = ExtSrc;
MatchData.Imm = ShiftAmt;
- unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
- return MinLeadingZeros >= ShiftAmt;
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
+ unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
+ return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
}
void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
@@ -1763,6 +1800,15 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
Register DstReg = MI.getOperand(Idx).getReg();
Register SrcReg = Operands[Idx];
+
+ // This combine may run after RegBankSelect, so we need to be aware of
+ // register banks.
+ const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
+ if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
+ SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
+ MRI.setRegClassOrRegBank(SrcReg, DstCB);
+ }
+
if (CanReuseInputDirectly)
replaceRegWith(MRI, DstReg, SrcReg);
else
@@ -2426,10 +2472,7 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
return true;
}
-bool CombinerHelper::eraseInst(MachineInstr &MI) {
- MI.eraseFromParent();
- return true;
-}
+void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
const MachineOperand &MOP2) {
@@ -2537,7 +2580,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
MaybeCst->getSExtValue() == C;
}
-bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
+void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
unsigned OpIdx) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
@@ -2545,17 +2588,15 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
- return true;
}
-bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
Register Replacement) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
- return true;
}
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
@@ -2590,36 +2631,32 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
}
-bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildFConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
+void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildUndef(MI.getOperand(0));
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchSimplifyAddToSub(
@@ -2750,9 +2787,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
Register Y = RightHandInst->getOperand(1).getReg();
LLT XTy = MRI.getType(X);
LLT YTy = MRI.getType(Y);
- if (XTy != YTy)
- return false;
- if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ if (!XTy.isValid() || XTy != YTy)
return false;
// Optional extra source register.
@@ -2779,6 +2814,9 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
}
}
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
// Record the steps to build the new instructions.
//
// Steps to build (logic x, y)
@@ -3227,7 +3265,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
/// \p SelectOperand is the operand in binary operator \p MI that is the select
/// to fold.
-bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
+void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
const unsigned &SelectOperand) {
Builder.setInstrAndDebugLoc(MI);
@@ -3263,8 +3301,6 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
MI.eraseFromParent();
-
- return true;
}
std::optional<SmallVector<Register, 8>>
@@ -3612,275 +3648,6 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
-/// Check if the store \p Store is a truncstore that can be merged. That is,
-/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
-/// Register then it does not need to match and SrcVal is set to the source
-/// value found.
-/// On match, returns the start byte offset of the \p SrcVal that is being
-/// stored.
-static std::optional<int64_t>
-getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
- MachineRegisterInfo &MRI) {
- Register TruncVal;
- if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
- return std::nullopt;
-
- // The shift amount must be a constant multiple of the narrow type.
- // It is translated to the offset address in the wide source value "y".
- //
- // x = G_LSHR y, ShiftAmtC
- // s8 z = G_TRUNC x
- // store z, ...
- Register FoundSrcVal;
- int64_t ShiftAmt;
- if (!mi_match(TruncVal, MRI,
- m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
- m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
- if (!SrcVal.isValid() || TruncVal == SrcVal) {
- if (!SrcVal.isValid())
- SrcVal = TruncVal;
- return 0; // If it's the lowest index store.
- }
- return std::nullopt;
- }
-
- unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
- if (ShiftAmt % NarrowBits!= 0)
- return std::nullopt;
- const unsigned Offset = ShiftAmt / NarrowBits;
-
- if (SrcVal.isValid() && FoundSrcVal != SrcVal)
- return std::nullopt;
-
- if (!SrcVal.isValid())
- SrcVal = FoundSrcVal;
- else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
- return std::nullopt;
- return Offset;
-}
-
-/// Match a pattern where a wide type scalar value is stored by several narrow
-/// stores. Fold it into a single store or a BSWAP and a store if the targets
-/// supports it.
-///
-/// Assuming little endian target:
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 0) & 0xFF;
-/// p[1] = (val >> 8) & 0xFF;
-/// p[2] = (val >> 16) & 0xFF;
-/// p[3] = (val >> 24) & 0xFF;
-/// =>
-/// *((i32)p) = val;
-///
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 24) & 0xFF;
-/// p[1] = (val >> 16) & 0xFF;
-/// p[2] = (val >> 8) & 0xFF;
-/// p[3] = (val >> 0) & 0xFF;
-/// =>
-/// *((i32)p) = BSWAP(val);
-bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
- auto &StoreMI = cast<GStore>(MI);
- LLT MemTy = StoreMI.getMMO().getMemoryType();
-
- // We only handle merging simple stores of 1-4 bytes.
- if (!MemTy.isScalar())
- return false;
- switch (MemTy.getSizeInBits()) {
- case 8:
- case 16:
- case 32:
- break;
- default:
- return false;
- }
- if (!StoreMI.isSimple())
- return false;
-
- // We do a simple search for mergeable stores prior to this one.
- // Any potential alias hazard along the way terminates the search.
- SmallVector<GStore *> FoundStores;
-
- // We're looking for:
- // 1) a (store(trunc(...)))
- // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
- // the partial value stored.
- // 3) where the offsets form either a little or big-endian sequence.
-
- auto &LastStore = StoreMI;
-
- // The single base pointer that all stores must use.
- Register BaseReg;
- int64_t LastOffset;
- if (!mi_match(LastStore.getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
- BaseReg = LastStore.getPointerReg();
- LastOffset = 0;
- }
-
- GStore *LowestIdxStore = &LastStore;
- int64_t LowestIdxOffset = LastOffset;
-
- Register WideSrcVal;
- auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
- if (!LowestShiftAmt)
- return false; // Didn't match a trunc.
- assert(WideSrcVal.isValid());
-
- LLT WideStoreTy = MRI.getType(WideSrcVal);
- // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
- if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
- return false;
- const unsigned NumStoresRequired =
- WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
-
- SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
- OffsetMap[*LowestShiftAmt] = LastOffset;
- FoundStores.emplace_back(&LastStore);
-
- // Search the block up for more stores.
- // We use a search threshold of 10 instructions here because the combiner
- // works top-down within a block, and we don't want to search an unbounded
- // number of predecessor instructions trying to find matching stores.
- // If we moved this optimization into a separate pass then we could probably
- // use a more efficient search without having a hard-coded threshold.
- const int MaxInstsToCheck = 10;
- int NumInstsChecked = 0;
- for (auto II = ++LastStore.getReverseIterator();
- II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
- ++II) {
- NumInstsChecked++;
- GStore *NewStore;
- if ((NewStore = dyn_cast<GStore>(&*II))) {
- if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
- break;
- } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
- break;
- } else {
- continue; // This is a safe instruction we can look past.
- }
-
- Register NewBaseReg;
- int64_t MemOffset;
- // Check we're storing to the same base + some offset.
- if (!mi_match(NewStore->getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
- NewBaseReg = NewStore->getPointerReg();
- MemOffset = 0;
- }
- if (BaseReg != NewBaseReg)
- break;
-
- auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
- if (!ShiftByteOffset)
- break;
- if (MemOffset < LowestIdxOffset) {
- LowestIdxOffset = MemOffset;
- LowestIdxStore = NewStore;
- }
-
- // Map the offset in the store and the offset in the combined value, and
- // early return if it has been set before.
- if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
- OffsetMap[*ShiftByteOffset] != INT64_MAX)
- break;
- OffsetMap[*ShiftByteOffset] = MemOffset;
-
- FoundStores.emplace_back(NewStore);
- // Reset counter since we've found a matching inst.
- NumInstsChecked = 0;
- if (FoundStores.size() == NumStoresRequired)
- break;
- }
-
- if (FoundStores.size() != NumStoresRequired) {
- return false;
- }
-
- const auto &DL = LastStore.getMF()->getDataLayout();
- auto &C = LastStore.getMF()->getFunction().getContext();
- // Check that a store of the wide type is both allowed and fast on the target
- unsigned Fast = 0;
- bool Allowed = getTargetLowering().allowsMemoryAccess(
- C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
- if (!Allowed || !Fast)
- return false;
-
- // Check if the pieces of the value are going to the expected places in memory
- // to merge the stores.
- unsigned NarrowBits = MemTy.getScalarSizeInBits();
- auto checkOffsets = [&](bool MatchLittleEndian) {
- if (MatchLittleEndian) {
- for (unsigned i = 0; i != NumStoresRequired; ++i)
- if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- } else { // MatchBigEndian by reversing loop counter.
- for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
- ++i, --j)
- if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- }
- return true;
- };
-
- // Check if the offsets line up for the native data layout of this target.
- bool NeedBswap = false;
- bool NeedRotate = false;
- if (!checkOffsets(DL.isLittleEndian())) {
- // Special-case: check if byte offsets line up for the opposite endian.
- if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
- NeedBswap = true;
- else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
- NeedRotate = true;
- else
- return false;
- }
-
- if (NeedBswap &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
- return false;
- if (NeedRotate &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
- return false;
-
- MatchInfo.NeedBSwap = NeedBswap;
- MatchInfo.NeedRotate = NeedRotate;
- MatchInfo.LowestIdxStore = LowestIdxStore;
- MatchInfo.WideSrcVal = WideSrcVal;
- MatchInfo.FoundStores = std::move(FoundStores);
- return true;
-}
-
-void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
-
- Builder.setInstrAndDebugLoc(MI);
- Register WideSrcVal = MatchInfo.WideSrcVal;
- LLT WideStoreTy = MRI.getType(WideSrcVal);
-
- if (MatchInfo.NeedBSwap) {
- WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
- } else if (MatchInfo.NeedRotate) {
- assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
- "Unexpected type for rotate");
- auto RotAmt =
- Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
- WideSrcVal =
- Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
- }
-
- Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
- MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
- MatchInfo.LowestIdxStore->getMMO().getAlign());
-
- // Erase the old stores.
- for (auto *ST : MatchInfo.FoundStores)
- ST->eraseFromParent();
-}
-
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
@@ -4395,7 +4162,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
if (static_cast<uint64_t>(LSBImm) >= Size)
return false;
- uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
+ uint64_t Width = APInt(Size, AndImm).countr_one();
MatchInfo = [=](MachineIRBuilder &B) {
auto WidthCst = B.buildConstant(ExtractTy, Width);
auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
@@ -4496,7 +4263,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
// Calculate start position and width of the extract.
const int64_t Pos = ShrAmt;
- const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
+ const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
// It's preferable to keep the shift, rather than form G_SBFX.
// TODO: remove the G_AND via demanded bits analysis.
@@ -4695,6 +4462,62 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
return false;
}
+bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg,
+ Register OpLHS, Register OpRHS,
+ BuildFnTy &MatchInfo) {
+ LLT OpRHSTy = MRI.getType(OpRHS);
+ MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
+
+ if (OpLHSDef->getOpcode() != Opc)
+ return false;
+
+ MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
+ Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
+ Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
+
+ // If the inner op is (X op C), pull the constant out so it can be folded with
+ // other constants in the expression tree. Folding is not guaranteed so we
+ // might have (C1 op C2). In that case do not pull a constant out because it
+ // won't help and can lead to infinite loops.
+ if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
+ !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
+ if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
+ // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
+ };
+ return true;
+ }
+ if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // We don't check if the reassociation will break a legal addressing mode
+ // here since pointer arithmetic is handled by G_PTR_ADD.
+ unsigned Opc = MI.getOpcode();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
+ return true;
+ if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
+ return true;
+ return false;
+}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
@@ -4766,7 +4589,7 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
return false;
// No point in combining if there's nothing to truncate.
- unsigned NarrowWidth = Mask.countTrailingOnes();
+ unsigned NarrowWidth = Mask.countr_one();
if (NarrowWidth == WideTy.getSizeInBits())
return false;
LLT NarrowTy = LLT::scalar(NarrowWidth);
@@ -4956,7 +4779,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
// Magic algorithm doesn't work for division by 1. We need to emit a select
// at the end.
// TODO: Use undef values for divisor of 1.
- if (!Divisor.isOneValue()) {
+ if (!Divisor.isOne()) {
UnsignedDivisionByConstantInfo magics =
UnsignedDivisionByConstantInfo::get(Divisor);
@@ -5144,7 +4967,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
auto *CI = cast<ConstantInt>(C);
APInt Divisor = CI->getValue();
- unsigned Shift = Divisor.countTrailingZeros();
+ unsigned Shift = Divisor.countr_zero();
if (Shift) {
Divisor.ashrInPlace(Shift);
UseSRA = true;
@@ -6185,6 +6008,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
return CmpInst::isEquality(Pred) && Y.isValid();
}
+bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
+ Register ShiftReg = MI.getOperand(2).getReg();
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
+ auto IsShiftTooBig = [&](const Constant *C) {
+ auto *CI = dyn_cast<ConstantInt>(C);
+ return CI && CI->uge(ResTy.getScalarSizeInBits());
+ };
+ return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
new file mode 100644
index 000000000000..d747cbf5aadc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -0,0 +1,68 @@
+//===- llvm/CodeGen/GlobalISel/GIMatchTableExecutor.cpp -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements the GIMatchTableExecutor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "gi-match-table-executor"
+
+using namespace llvm;
+
+GIMatchTableExecutor::MatcherState::MatcherState(unsigned MaxRenderers)
+ : Renderers(MaxRenderers) {}
+
+GIMatchTableExecutor::GIMatchTableExecutor() = default;
+
+bool GIMatchTableExecutor::isOperandImmEqual(
+ const MachineOperand &MO, int64_t Value,
+ const MachineRegisterInfo &MRI) const {
+ if (MO.isReg() && MO.getReg())
+ if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ return VRegVal->Value.getSExtValue() == Value;
+ return false;
+}
+
+bool GIMatchTableExecutor::isBaseWithConstantOffset(
+ const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
+ if (!Root.isReg())
+ return false;
+
+ MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
+ if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ MachineOperand &RHS = RootI->getOperand(2);
+ MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
+ if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+
+ return true;
+}
+
+bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
+ MachineInstr &IntoMI) const {
+ // Immediate neighbours are already folded.
+ if (MI.getParent() == IntoMI.getParent() &&
+ std::next(MI.getIterator()) == IntoMI.getIterator())
+ return true;
+
+ // Convergent instructions cannot be moved in the CFG.
+ if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
+ return false;
+
+ return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+ !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index bfbe7e1c3e55..363ffbfa90b5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -115,7 +116,7 @@ void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
@@ -191,7 +192,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
// Known bits are the values that are shared by every demanded element.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -235,10 +236,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// For COPYs we don't do anything, don't increase the depth.
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If we reach a point where we don't know anything
// just stop looking through the operands.
- if (Known.One == 0 && Known.Zero == 0)
+ if (Known.isUnknown())
break;
} else {
// We know nothing.
@@ -750,7 +751,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
// Okay, we know that the sign bit in Mask is set. Use CLO to determine
// the number of identical bits in the top of the input value.
Mask <<= Mask.getBitWidth() - TyBits;
- return std::max(FirstAnswer, Mask.countLeadingOnes());
+ return std::max(FirstAnswer, Mask.countl_one());
}
unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 7d811dc0ad8f..9a67a8d05a4d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -43,6 +44,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -74,7 +76,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -300,7 +301,7 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Op1 = getOrCreateVReg(*U.getOperand(1));
Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
@@ -314,7 +315,7 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
@@ -345,7 +346,7 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (CI)
Flags = MachineInstr::copyFlagsFromInstruction(*CI);
MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
@@ -844,8 +845,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
// For conditional branch lowering, we might try to do something silly like
// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
// just re-use the existing condition vreg.
- if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
- CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && CI->isOne() &&
+ CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
Cond = CondLHS;
} else {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
@@ -1018,7 +1019,7 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
LLT MaskTy = SwitchOpTy;
if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
- !isPowerOf2_32(MaskTy.getSizeInBits()))
+ !llvm::has_single_bit<uint32_t>(MaskTy.getSizeInBits()))
MaskTy = LLT::scalar(PtrTy.getSizeInBits());
else {
// Ensure that the type will fit the mask value.
@@ -1074,14 +1075,14 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
auto MaskTrailingZeros =
- MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
+ MIB.buildConstant(SwitchTy, llvm::countr_zero(B.Mask));
Cmp =
MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
.getReg(0);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
auto MaskTrailingOnes =
- MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
+ MIB.buildConstant(SwitchTy, llvm::countr_one(B.Mask));
Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
.getReg(0);
} else {
@@ -1294,7 +1295,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
AAMDNodes AAInfo = LI.getAAMetadata();
const Value *Ptr = LI.getPointerOperand();
- Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType());
+ Type *OffsetIRTy = DL->getIndexType(Ptr->getType());
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
if (CLI->supportSwiftError() && isSwiftError(Ptr)) {
@@ -1342,7 +1343,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
Register Base = getOrCreateVReg(*SI.getPointerOperand());
- Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
+ Type *OffsetIRTy = DL->getIndexType(SI.getPointerOperandType());
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
@@ -1438,7 +1439,7 @@ bool IRTranslator::translateSelect(const User &U,
ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
Flags = MachineInstr::copyFlagsFromInstruction(*SI);
@@ -1468,8 +1469,14 @@ bool IRTranslator::translateBitCast(const User &U,
MachineIRBuilder &MIRBuilder) {
// If we're bitcasting to the source type, we can reuse the source vreg.
if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
- getLLTForType(*U.getType(), *DL))
+ getLLTForType(*U.getType(), *DL)) {
+ // If the source is a ConstantInt then it was probably created by
+ // ConstantHoisting and we should leave it alone.
+ if (isa<ConstantInt>(U.getOperand(0)))
+ return translateCast(TargetOpcode::G_CONSTANT_FOLD_BARRIER, U,
+ MIRBuilder);
return translateCopy(U, *U.getOperand(0), MIRBuilder);
+ }
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
}
@@ -1488,7 +1495,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Register BaseReg = getOrCreateVReg(Op0);
Type *PtrIRTy = Op0.getType();
LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
- Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ Type *OffsetIRTy = DL->getIndexType(PtrIRTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
// Normalize Vector GEP - all scalar operands should be converted to the
@@ -1513,7 +1520,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
.getReg(0);
PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
PtrTy = getLLTForType(*PtrIRTy, *DL);
- OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ OffsetIRTy = DL->getIndexType(PtrIRTy);
OffsetTy = getLLTForType(*OffsetIRTy, *DL);
}
@@ -1759,6 +1766,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FLOG2;
case Intrinsic::log10:
return TargetOpcode::G_FLOG10;
+ case Intrinsic::ldexp:
+ return TargetOpcode::G_FLDEXP;
case Intrinsic::nearbyint:
return TargetOpcode::G_FNEARBYINT;
case Intrinsic::pow:
@@ -1851,6 +1860,8 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_STRICT_FMA;
case Intrinsic::experimental_constrained_sqrt:
return TargetOpcode::G_STRICT_FSQRT;
+ case Intrinsic::experimental_constrained_ldexp:
+ return TargetOpcode::G_STRICT_FLDEXP;
default:
return 0;
}
@@ -1864,7 +1875,7 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
if (!Opcode)
return false;
- unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI);
if (EB == fp::ExceptionBehavior::ebIgnore)
Flags |= MachineInstr::NoFPExcept;
@@ -1879,6 +1890,60 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
return true;
}
+std::optional<MCRegister> IRTranslator::getArgPhysReg(Argument &Arg) {
+ auto VRegs = getOrCreateVRegs(Arg);
+ if (VRegs.size() != 1)
+ return std::nullopt;
+
+ // Arguments are lowered as a copy of a livein physical register.
+ auto *VRegDef = MF->getRegInfo().getVRegDef(VRegs[0]);
+ if (!VRegDef || !VRegDef->isCopy())
+ return std::nullopt;
+ return VRegDef->getOperand(1).getReg().asMCReg();
+}
+
+bool IRTranslator::translateIfEntryValueArgument(const DbgValueInst &DebugInst,
+ MachineIRBuilder &MIRBuilder) {
+ auto *Arg = dyn_cast<Argument>(DebugInst.getValue());
+ if (!Arg)
+ return false;
+
+ const DIExpression *Expr = DebugInst.getExpression();
+ if (!Expr->isEntryValue())
+ return false;
+
+ std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg);
+ if (!PhysReg) {
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n"
+ << DebugInst << "\n");
+ return true;
+ }
+
+ MIRBuilder.buildDirectDbgValue(*PhysReg, DebugInst.getVariable(),
+ DebugInst.getExpression());
+ return true;
+}
+
+bool IRTranslator::translateIfEntryValueArgument(
+ const DbgDeclareInst &DebugInst) {
+ auto *Arg = dyn_cast<Argument>(DebugInst.getAddress());
+ if (!Arg)
+ return false;
+
+ const DIExpression *Expr = DebugInst.getExpression();
+ if (!Expr->isEntryValue())
+ return false;
+
+ std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg);
+ if (!PhysReg)
+ return false;
+
+ MF->setVariableDbgInfo(DebugInst.getVariable(), Expr, *PhysReg,
+ DebugInst.getDebugLoc());
+ return true;
+}
+
bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
@@ -1945,12 +2010,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// instructions (in fact, they get ignored if they *do* exist).
MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
getOrCreateFrameIndex(*AI), DI.getDebugLoc());
- } else {
- // A dbg.declare describes the address of a source variable, so lower it
- // into an indirect DBG_VALUE.
- MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
- DI.getVariable(), DI.getExpression());
+ return true;
}
+
+ if (translateIfEntryValueArgument(DI))
+ return true;
+
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
return true;
}
case Intrinsic::dbg_label: {
@@ -1991,16 +2060,32 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to
// terminate any prior location.
MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
- } else if (const auto *CI = dyn_cast<Constant>(V)) {
+ return true;
+ }
+ if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
- } else {
- for (Register Reg : getOrCreateVRegs(*V)) {
- // FIXME: This does not handle register-indirect values at offset 0. The
- // direct/indirect thing shouldn't really be handled by something as
- // implicit as reg+noreg vs reg+imm in the first place, but it seems
- // pretty baked in right now.
- MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
- }
+ return true;
+ }
+ if (auto *AI = dyn_cast<AllocaInst>(V);
+ AI && AI->isStaticAlloca() && DI.getExpression()->startsWithDeref()) {
+ // If the value is an alloca and the expression starts with a
+ // dereference, track a stack slot instead of a register, as registers
+ // may be clobbered.
+ auto ExprOperands = DI.getExpression()->getElements();
+ auto *ExprDerefRemoved =
+ DIExpression::get(AI->getContext(), ExprOperands.drop_front());
+ MIRBuilder.buildFIDbgValue(getOrCreateFrameIndex(*AI), DI.getVariable(),
+ ExprDerefRemoved);
+ return true;
+ }
+ if (translateIfEntryValueArgument(DI, MIRBuilder))
+ return true;
+ for (Register Reg : getOrCreateVRegs(*V)) {
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first place, but it seems
+ // pretty baked in right now.
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
}
return true;
}
@@ -2090,6 +2175,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*CI.getArgOperand(0)),
MachineInstr::copyFlagsFromInstruction(CI));
return true;
+ case Intrinsic::frexp: {
+ ArrayRef<Register> VRegs = getOrCreateVRegs(CI);
+ MIRBuilder.buildFFrexp(VRegs[0], VRegs[1],
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
case Intrinsic::memcpy_inline:
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
case Intrinsic::memcpy:
@@ -2296,7 +2388,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return CLI->lowerCall(MIRBuilder, Info);
}
case Intrinsic::fptrunc_round: {
- unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
// Convert the metadata argument to a constant integer
Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e0357c50e555..3925611f1485 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -391,10 +391,12 @@ bool InlineAsmLowering::lowerInlineAsm(
Inst.addReg(SourceRegs[0]);
} else {
// Otherwise, this outputs to a register (directly for C_Register /
- // C_RegisterClass. Find a register that we can use.
+ // C_RegisterClass/C_Other.
assert(OpInfo.ConstraintType == TargetLowering::C_Register ||
- OpInfo.ConstraintType == TargetLowering::C_RegisterClass);
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Other);
+ // Find a register that we can use.
if (OpInfo.Regs.empty()) {
LLVM_DEBUG(dbgs()
<< "Couldn't allocate output register for constraint\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index f780050ca3f1..9bbef11067ae 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/config.h"
@@ -104,7 +105,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
CodeGenCoverage CoverageInfo;
assert(ISel && "Cannot work without InstructionSelector");
- ISel->setupMF(MF, KB, CoverageInfo, PSI, BFI);
+ ISel->setupMF(MF, KB, &CoverageInfo, PSI, BFI);
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
@@ -165,12 +166,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- // Eliminate hints.
- if (isPreISelGenericOptimizationHint(MI.getOpcode())) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ // Eliminate hints or G_CONSTANT_FOLD_BARRIER.
+ if (isPreISelGenericOptimizationHint(MI.getOpcode()) ||
+ MI.getOpcode() == TargetOpcode::G_CONSTANT_FOLD_BARRIER) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
- // At this point, the destination register class of the hint may have
+ // At this point, the destination register class of the op may have
// been decided.
//
// Propagate that through to the source register.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 8959d215ecd1..c48591cc2f02 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -5,64 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This file implements the InstructionSelector class.
-//
-//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-#define DEBUG_TYPE "instructionselector"
-
-using namespace llvm;
-
-InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
- : Renderers(MaxRenderers) {}
-
-InstructionSelector::InstructionSelector() = default;
-
-bool InstructionSelector::isOperandImmEqual(
- const MachineOperand &MO, int64_t Value,
- const MachineRegisterInfo &MRI) const {
- if (MO.isReg() && MO.getReg())
- if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
- return VRegVal->Value.getSExtValue() == Value;
- return false;
-}
-
-bool InstructionSelector::isBaseWithConstantOffset(
- const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
- if (!Root.isReg())
- return false;
-
- MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
- if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
- return false;
-
- MachineOperand &RHS = RootI->getOperand(2);
- MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
- if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
- return false;
-
- return true;
-}
-bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
- MachineInstr &IntoMI) const {
- // Immediate neighbours are already folded.
- if (MI.getParent() == IntoMI.getParent() &&
- std::next(MI.getIterator()) == IntoMI.getIterator())
- return true;
+namespace llvm {
- // Convergent instructions cannot be moved in the CFG.
- if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
- return false;
+// vtable anchor
+InstructionSelector::~InstructionSelector() = default;
- return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
- !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
-}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 54a82cac95d5..2c77ed8b0600 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -164,7 +164,8 @@ LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx,
LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
- return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits());
+ return QueryTy.isScalar() &&
+ !llvm::has_single_bit<uint32_t>(QueryTy.getSizeInBits());
};
}
@@ -184,14 +185,16 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
- return !isPowerOf2_32(Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
+ return !llvm::has_single_bit<uint32_t>(
+ Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
};
}
LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy;
- return !MemTy.isByteSized() || !isPowerOf2_32(MemTy.getSizeInBytes());
+ return !MemTy.isByteSized() ||
+ !llvm::has_single_bit<uint32_t>(MemTy.getSizeInBytes());
};
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1a13f39c100c..aecbe0b7604c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -75,6 +76,7 @@ INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
@@ -85,6 +87,8 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
AU.addPreserved<GISelCSEAnalysisWrapperPass>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -173,7 +177,8 @@ Legalizer::MFResult
Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
ArrayRef<GISelChangeObserver *> AuxObservers,
LostDebugLocObserver &LocObserver,
- MachineIRBuilder &MIRBuilder) {
+ MachineIRBuilder &MIRBuilder,
+ GISelKnownBits *KB) {
MIRBuilder.setMF(MF);
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -212,7 +217,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
// Now install the observer as the delegate to MF.
// This will keep all the observers notified about new insertions/deletions.
RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
- LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
+ LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB);
LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
@@ -314,8 +319,6 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
- const size_t NumBlocks = MF.size();
-
std::unique_ptr<MachineIRBuilder> MIRBuilder;
GISelCSEInfo *CSEInfo = nullptr;
bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
@@ -338,25 +341,18 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
if (VerifyDebugLocs > DebugLocVerifyLevel::None)
AuxObservers.push_back(&LocObserver);
+ // This allows Known Bits Analysis in the legalizer.
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+
const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
- MFResult Result =
- legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, *MIRBuilder);
+ MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, LocObserver,
+ *MIRBuilder, KB);
if (Result.FailedOn) {
reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
"unable to legalize instruction", *Result.FailedOn);
return false;
}
- // For now don't support if new blocks are inserted - we would need to fix the
- // outer loop for that.
- if (MF.size() != NumBlocks) {
- MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure",
- MF.getFunction().getSubprogram(),
- /*MBB=*/nullptr);
- R << "inserting blocks is not supported yet";
- reportGISelFailure(MF, TPC, MORE, R);
- return false;
- }
if (LocObserver.getNumLostDebugLocs()) {
MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc",
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 8a1fce2d3d65..f0da0d88140f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -15,12 +15,14 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -102,13 +104,13 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF,
MachineIRBuilder &Builder)
: MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
LI(*MF.getSubtarget().getLegalizerInfo()),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
+ TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
- MachineIRBuilder &B)
- : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
+ MachineIRBuilder &B, GISelKnownBits *KB)
+ : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
+ TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
@@ -540,6 +542,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(LOG_F);
case TargetOpcode::G_FLOG2:
RTLIBCASE(LOG2_F);
+ case TargetOpcode::G_FLDEXP:
+ RTLIBCASE(LDEXP_F);
case TargetOpcode::G_FCEIL:
RTLIBCASE(CEIL_F);
case TargetOpcode::G_FFLOOR:
@@ -824,6 +828,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FCEIL:
@@ -1411,6 +1416,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP:
+ return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
}
}
@@ -1504,13 +1512,11 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
if (DstTy.isVector())
return UnableToLegalize;
- Register Src1 = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(Src1);
+ LLT SrcTy = MRI.getType(Src1Reg);
const int DstSize = DstTy.getSizeInBits();
const int SrcSize = SrcTy.getSizeInBits();
const int WideSize = WideTy.getSizeInBits();
@@ -1522,7 +1528,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
if (WideSize >= DstSize) {
// Directly pack the bits in the target type.
- Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
for (unsigned I = 2; I != NumOps; ++I) {
const unsigned Offset = (I - 1) * PartSize;
@@ -1753,11 +1759,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Offset = MI.getOperand(2).getImm();
if (TypeIdx == 0) {
@@ -1978,10 +1980,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
}
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
- Register Result = MI.getOperand(0).getReg();
- Register OriginalOverflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
LLT SrcTy = MRI.getType(LHS);
LLT OverflowTy = MRI.getType(OriginalOverflow);
unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
@@ -2560,12 +2559,41 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_FPOWI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
+ case TargetOpcode::G_FPOWI:
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP: {
+ if (TypeIdx == 0) {
+ if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 1) {
+ // For some reason SelectionDAG tries to promote to a libcall without
+ // actually changing the integer type for promotion.
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+ }
+ case TargetOpcode::G_FFREXP: {
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ } else {
+ widenScalarDst(MI, WideTy, 1);
+ }
+
Observer.changedInstr(MI);
return Legalized;
}
@@ -2631,12 +2659,34 @@ static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBitcast(MachineInstr &MI) {
+LegalizerHelper::lowerFConstant(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+
+ unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
+ LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ Align Alignment = Align(DL.getABITypeAlign(
+ getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst))));
+
+ auto Addr = MIRBuilder.buildConstantPool(
+ AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
+ MI.getOperand(1).getFPImm(), Alignment));
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
+ MRI.getType(Dst), Alignment);
+
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitcast(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
if (SrcTy.isVector()) {
LLT SrcEltTy = SrcTy.getElementType();
SmallVector<Register, 8> SrcRegs;
@@ -2732,11 +2782,7 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Idx = MI.getOperand(2).getReg();
- LLT SrcVecTy = MRI.getType(SrcVec);
- LLT IdxTy = MRI.getType(Idx);
+ auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
LLT SrcEltTy = SrcVecTy.getElementType();
unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
@@ -2872,13 +2918,9 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Val = MI.getOperand(2).getReg();
- Register Idx = MI.getOperand(3).getReg();
-
- LLT VecTy = MRI.getType(Dst);
- LLT IdxTy = MRI.getType(Idx);
+ auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
+ MI.getFirst4RegLLTs();
+ LLT VecTy = DstTy;
LLT VecEltTy = VecTy.getElementType();
LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
@@ -3004,7 +3046,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (!isPowerOf2_32(MemSizeInBits)) {
// This load needs splitting into power of 2 sized loads.
- LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ LargeSplitSize = llvm::bit_floor(MemSizeInBits);
SmallSplitSize = MemSizeInBits - LargeSplitSize;
} else {
// This is already a power of 2, but we still need to split this in half.
@@ -3122,7 +3164,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
uint64_t LargeSplitSize, SmallSplitSize;
if (!isPowerOf2_32(MemSizeInBits)) {
- LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
+ LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
} else {
auto &Ctx = MF.getFunction().getContext();
@@ -3250,6 +3292,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
switch(MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_FCONSTANT:
+ return lowerFConstant(MI);
case TargetOpcode::G_BITCAST:
return lowerBitcast(MI);
case TargetOpcode::G_SREM:
@@ -3274,10 +3318,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
// result.
- Register Res = MI.getOperand(0).getReg();
- Register Overflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Res);
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
@@ -3308,7 +3349,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case TargetOpcode::G_FNEG: {
- Register Res = MI.getOperand(0).getReg();
+ auto [Res, SubByReg] = MI.getFirst2Regs();
LLT Ty = MRI.getType(Res);
// TODO: Handle vector types once we are able to
@@ -3317,23 +3358,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return UnableToLegalize;
auto SignMask =
MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
- Register SubByReg = MI.getOperand(1).getReg();
MIRBuilder.buildXor(Res, SubByReg, SignMask);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FSUB:
case TargetOpcode::G_STRICT_FSUB: {
- Register Res = MI.getOperand(0).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
// Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
- // First, check if G_FNEG is marked as Lower. If so, we may
- // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
- if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
- return UnableToLegalize;
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
@@ -3357,11 +3391,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
- Register OldValRes = MI.getOperand(0).getReg();
- Register SuccessRes = MI.getOperand(1).getReg();
- Register Addr = MI.getOperand(2).getReg();
- Register CmpVal = MI.getOperand(3).getReg();
- Register NewVal = MI.getOperand(4).getReg();
+ auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
**MI.memoperands_begin());
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
@@ -3381,10 +3411,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_CTPOP:
return lowerBitCount(MI);
case G_UADDO: {
- Register Res = MI.getOperand(0).getReg();
- Register CarryOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
MIRBuilder.buildAdd(Res, LHS, RHS);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
@@ -3393,11 +3420,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_UADDE: {
- Register Res = MI.getOperand(0).getReg();
- Register CarryOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- Register CarryIn = MI.getOperand(4).getReg();
+ auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
LLT Ty = MRI.getType(Res);
auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
@@ -3409,10 +3432,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_USUBO: {
- Register Res = MI.getOperand(0).getReg();
- Register BorrowOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
MIRBuilder.buildSub(Res, LHS, RHS);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
@@ -3421,11 +3441,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_USUBE: {
- Register Res = MI.getOperand(0).getReg();
- Register BorrowOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- Register BorrowIn = MI.getOperand(4).getReg();
+ auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
const LLT CondTy = MRI.getType(BorrowOut);
const LLT Ty = MRI.getType(Res);
@@ -3470,8 +3486,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
assert(MI.getOperand(2).isImm() && "Expected immediate");
int64_t SizeInBits = MI.getOperand(2).getImm();
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
LLT DstTy = MRI.getType(DstReg);
Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
@@ -3869,9 +3884,7 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
// Requires compatible types. Otherwise user of DstReg did not perform unmerge
// that should have been artifact combined. Most likely instruction that uses
// DstReg has to do more/fewer elements legalization compatible with NarrowTy.
@@ -3958,8 +3971,7 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowVecTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
+ auto [DstReg, SrcVec] = MI.getFirst2Regs();
Register InsertVal;
bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
@@ -4159,6 +4171,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FLOG:
case G_FLOG2:
case G_FLOG10:
+ case G_FLDEXP:
case G_FNEARBYINT:
case G_FCEIL:
case G_FFLOOR:
@@ -4234,6 +4247,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_STRICT_FSUB:
case G_STRICT_FMUL:
case G_STRICT_FMA:
+ case G_STRICT_FLDEXP:
+ case G_FFREXP:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
@@ -4278,13 +4293,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
if (TypeIdx != 0)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
+ auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
+ MI.getFirst3RegLLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT DstTy = MRI.getType(DstReg);
- LLT Src1Ty = MRI.getType(Src1Reg);
- LLT Src2Ty = MRI.getType(Src2Reg);
// The shuffle should be canonicalized by now.
if (DstTy != Src1Ty)
return UnableToLegalize;
@@ -4474,10 +4485,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
// The semantics of the normal non-sequential reductions allow us to freely
// re-associate the operation.
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
if (NarrowTy.isVector() &&
(SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
@@ -4865,6 +4873,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
case TargetOpcode::G_EXTRACT:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -4873,6 +4882,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INSERT:
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
case TargetOpcode::G_FREEZE:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
@@ -4887,10 +4897,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT: {
- Register DstReg = MI.getOperand(0).getReg();
- Register CondReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT CondTy = MRI.getType(CondReg);
+ auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
if (TypeIdx == 1) {
if (!CondTy.isScalar() ||
DstTy.getElementCount() != MoreTy.getElementCount())
@@ -4943,28 +4950,50 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FPEXT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ LLT SrcTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(1).getReg()).getElementType());
+ moreElementsVectorSrc(MI, SrcTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
}
-/// Expand source vectors to the size of destination vector.
-static LegalizerHelper::LegalizeResult
-equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+LegalizerHelper::LegalizeResult
+LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
unsigned MaskNumElts = Mask.size();
unsigned SrcNumElts = SrcTy.getNumElements();
- Register DstReg = MI.getOperand(0).getReg();
LLT DestEltTy = DstTy.getElementType();
- // TODO: Normalize the shuffle vector since mask and vector length don't
- // match.
- if (MaskNumElts <= SrcNumElts) {
- return LegalizerHelper::LegalizeResult::UnableToLegalize;
+ if (MaskNumElts == SrcNumElts)
+ return Legalized;
+
+ if (MaskNumElts < SrcNumElts) {
+ // Extend mask to match new destination vector size with
+ // undef values.
+ SmallVector<int, 16> NewMask(Mask);
+ for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
+ NewMask.push_back(-1);
+
+ moreElementsVectorDst(MI, SrcTy, 0);
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg(),
+ MI.getOperand(2).getReg(), NewMask);
+ MI.eraseFromParent();
+
+ return Legalized;
}
unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
@@ -5014,19 +5043,14 @@ equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
unsigned int TypeIdx, LLT MoreTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
+ auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT DstTy = MRI.getType(DstReg);
- LLT Src1Ty = MRI.getType(Src1Reg);
- LLT Src2Ty = MRI.getType(Src2Reg);
unsigned NumElts = DstTy.getNumElements();
unsigned WidenNumElts = MoreTy.getNumElements();
if (DstTy.isVector() && Src1Ty.isVector() &&
- DstTy.getNumElements() > Src1Ty.getNumElements()) {
- return equalizeVectorShuffleLengths(MI, MIRBuilder);
+ DstTy.getNumElements() != Src1Ty.getNumElements()) {
+ return equalizeVectorShuffleLengths(MI);
}
if (TypeIdx != 0)
@@ -5218,9 +5242,7 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1 = MI.getOperand(1).getReg();
- Register Src2 = MI.getOperand(2).getReg();
+ auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
LLT Ty = MRI.getType(DstReg);
if (Ty.isVector())
@@ -5471,8 +5493,7 @@ LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
@@ -5539,10 +5560,7 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5575,10 +5593,7 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5611,9 +5626,7 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5631,6 +5644,31 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ MachineIRBuilder &B = MIRBuilder;
+ Register ExpReg = MI.getOperand(2).getReg();
+ LLT ExpTy = MRI.getType(ExpReg);
+
+ unsigned ClampSize = NarrowTy.getScalarSizeInBits();
+
+ // Clamp the exponent to the range of the target type.
+ auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
+ auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
+ auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
+ auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
+
+ auto Trunc = B.buildTrunc(NarrowTy, Clamp);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(Trunc.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
const auto &TII = MIRBuilder.getTII();
@@ -5649,10 +5687,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
return Legalized;
}
case TargetOpcode::G_CTLZ: {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Len = SrcTy.getSizeInBits();
if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
@@ -5699,10 +5734,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
return Legalized;
}
case TargetOpcode::G_CTTZ: {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Len = SrcTy.getSizeInBits();
if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
@@ -5808,10 +5840,7 @@ static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- Register Y = MI.getOperand(2).getReg();
- Register Z = MI.getOperand(3).getReg();
+ auto [Dst, X, Y, Z] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
@@ -5850,10 +5879,7 @@ LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- Register Y = MI.getOperand(2).getReg();
- Register Z = MI.getOperand(3).getReg();
+ auto [Dst, X, Y, Z] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
@@ -5932,10 +5958,7 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getReg();
- LLT AmtTy = MRI.getType(Amt);
+ auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
@@ -5946,12 +5969,7 @@ LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- LLT AmtTy = MRI.getType(Amt);
+ auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
@@ -6021,8 +6039,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
// representation.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);
@@ -6077,10 +6094,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
if (SrcTy == LLT::scalar(1)) {
auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
@@ -6105,10 +6119,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6151,10 +6162,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6194,10 +6202,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6263,17 +6268,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ const LLT S1 = LLT::scalar(1);
+ const LLT S32 = LLT::scalar(32);
+
+ auto [Dst, Src] = MI.getFirst2Regs();
+ assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
+ MRI.getType(Src).getScalarType() == LLT::scalar(64));
if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;
+ if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
+ unsigned Flags = MI.getFlags();
+ auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
+ MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
const unsigned ExpMask = 0x7ff;
const unsigned ExpBiasf64 = 1023;
const unsigned ExpBiasf16 = 15;
- const LLT S32 = LLT::scalar(32);
- const LLT S1 = LLT::scalar(1);
auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
Register U = Unmerge.getReg(0);
@@ -6368,11 +6383,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
-
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [DstTy, SrcTy] = MI.getFirst2LLTs();
const LLT S64 = LLT::scalar(64);
const LLT S16 = LLT::scalar(16);
@@ -6385,9 +6396,7 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
// multiplication tree.
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
@@ -6412,9 +6421,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
LLT CmpType = MRI.getType(Dst).changeElementSize(1);
@@ -6428,13 +6435,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
-
- const LLT Src0Ty = MRI.getType(Src0);
- const LLT Src1Ty = MRI.getType(Src1);
-
+ auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
const int Src0Size = Src0Ty.getScalarSizeInBits();
const int Src1Size = Src1Ty.getScalarSizeInBits();
@@ -6475,9 +6476,7 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
if (!MI.getFlag(MachineInstr::FmNoNans)) {
@@ -6516,8 +6515,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
+ auto [DstReg, X] = MI.getFirst2Regs();
const unsigned Flags = MI.getFlags();
const LLT Ty = MRI.getType(DstReg);
const LLT CondTy = Ty.changeElementSize(1);
@@ -6547,10 +6545,8 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFFloor(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
unsigned Flags = MI.getFlags();
LLT Ty = MRI.getType(DstReg);
const LLT CondTy = Ty.changeElementSize(1);
@@ -6577,11 +6573,8 @@ LegalizerHelper::lowerFFloor(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
const unsigned NumOps = MI.getNumOperands();
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(Src0Reg);
- unsigned PartSize = SrcTy.getSizeInBits();
+ auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
+ unsigned PartSize = Src0Ty.getSizeInBits();
LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
@@ -6729,11 +6722,8 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- Register Src1Reg = MI.getOperand(2).getReg();
- LLT Src0Ty = MRI.getType(Src0Reg);
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
+ MI.getFirst3RegLLTs();
LLT IdxTy = LLT::scalar(32);
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
@@ -6822,13 +6812,9 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerExtract(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Offset = MI.getOperand(2).getImm();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
-
// Extract sub-vector or one element
if (SrcTy.isVector()) {
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
@@ -6837,7 +6823,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
(Offset + DstSize <= SrcTy.getSizeInBits())) {
// Unmerge and allow access to each Src element for the artifact combiner.
- auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
// Take element(s) we need to extract and copy it (merge them).
SmallVector<Register, 8> SubVectorElts;
@@ -6846,9 +6832,9 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
SubVectorElts.push_back(Unmerge.getReg(Idx));
}
if (SubVectorElts.size() == 1)
- MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
+ MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
else
- MIRBuilder.buildMergeLikeInstr(Dst, SubVectorElts);
+ MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
MI.eraseFromParent();
return Legalized;
@@ -6861,15 +6847,15 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
LLT SrcIntTy = SrcTy;
if (!SrcTy.isScalar()) {
SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
- Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
+ SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
}
if (Offset == 0)
- MIRBuilder.buildTrunc(Dst, Src);
+ MIRBuilder.buildTrunc(DstReg, SrcReg);
else {
auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
- auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
- MIRBuilder.buildTrunc(Dst, Shr);
+ auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildTrunc(DstReg, Shr);
}
MI.eraseFromParent();
@@ -6880,9 +6866,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register InsertSrc = MI.getOperand(2).getReg();
+ auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
uint64_t Offset = MI.getOperand(3).getImm();
LLT DstTy = MRI.getType(Src);
@@ -6972,14 +6956,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
- Register Dst0 = MI.getOperand(0).getReg();
- Register Dst1 = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
+ MI.getFirst4RegLLTs();
const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
- LLT Ty = MRI.getType(Dst0);
- LLT BoolTy = MRI.getType(Dst1);
+ LLT Ty = Dst0Ty;
+ LLT BoolTy = Dst1Ty;
if (IsAdd)
MIRBuilder.buildAdd(Dst0, LHS, RHS);
@@ -7008,9 +6990,7 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
bool IsSigned;
bool IsAdd;
@@ -7085,9 +7065,7 @@ LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
LLT BoolTy = Ty.changeElementSize(1);
bool IsSigned;
@@ -7157,9 +7135,7 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) {
MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
"Expected shlsat opcode!");
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
LLT BoolTy = Ty.changeElementSize(1);
@@ -7185,10 +7161,8 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBswap(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
@@ -7233,8 +7207,7 @@ static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
unsigned Size = Ty.getSizeInBits();
@@ -7312,23 +7285,23 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- uint64_t Mask = MI.getOperand(2).getImm();
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
- if (Mask == 0) {
+ if (Mask == fcNone) {
MIRBuilder.buildConstant(DstReg, 0);
MI.eraseFromParent();
return Legalized;
}
- if ((Mask & fcAllFlags) == fcAllFlags) {
+ if (Mask == fcAllFlags) {
MIRBuilder.buildConstant(DstReg, 1);
MI.eraseFromParent();
return Legalized;
}
+ // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
+ // version
+
unsigned BitSize = SrcTy.getScalarSizeInBits();
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
@@ -7345,7 +7318,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
APInt QNaNBitMask =
APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
- APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits());
+ APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
@@ -7358,8 +7331,10 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
auto Res = MIRBuilder.buildConstant(DstTy, 0);
+ // Clang doesn't support capture of structured bindings:
+ LLT DstTyCopy = DstTy;
const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
- Res = MIRBuilder.buildOr(DstTy, Res, ToAppend);
+ Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
};
// Tests that involve more than one class should be processed first.
@@ -7382,8 +7357,20 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
Mask &= ~fcNegFinite;
}
+ if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ // TODO: Handle inverted case
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ ExpBits, ZeroC));
+ Mask &= ~PartialCheck;
+ }
+ }
+
// Check for individual classes.
- if (unsigned PartialCheck = Mask & fcZero) {
+ if (FPClassTest PartialCheck = Mask & fcZero) {
if (PartialCheck == fcPosZero)
appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
AsInt, ZeroC));
@@ -7395,7 +7382,21 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
AsInt, SignBitC));
}
- if (unsigned PartialCheck = Mask & fcInf) {
+ if (FPClassTest PartialCheck = Mask & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
+ auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
+ auto OneC = MIRBuilder.buildConstant(IntTy, 1);
+ auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
+ auto SubnormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
+ MIRBuilder.buildConstant(IntTy, AllOneMantissa));
+ if (PartialCheck == fcNegSubnormal)
+ SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
+ appendToRes(SubnormalRes);
+ }
+
+ if (FPClassTest PartialCheck = Mask & fcInf) {
if (PartialCheck == fcPosInf)
appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
AsInt, InfC));
@@ -7410,7 +7411,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
}
- if (unsigned PartialCheck = Mask & fcNan) {
+ if (FPClassTest PartialCheck = Mask & fcNan) {
auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
if (PartialCheck == fcNan) {
// isnan(V) ==> abs(V) u> int(inf)
@@ -7431,21 +7432,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
}
- if (unsigned PartialCheck = Mask & fcSubnormal) {
- // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
- // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
- auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
- auto OneC = MIRBuilder.buildConstant(IntTy, 1);
- auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
- auto SubnormalRes =
- MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
- MIRBuilder.buildConstant(IntTy, AllOneMantissa));
- if (PartialCheck == fcNegSubnormal)
- SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
- appendToRes(SubnormalRes);
- }
-
- if (unsigned PartialCheck = Mask & fcNormal) {
+ if (FPClassTest PartialCheck = Mask & fcNormal) {
// isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
// (max_exp-1))
APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
@@ -7472,12 +7459,8 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
// Implement vector G_SELECT in terms of XOR, AND, OR.
- Register DstReg = MI.getOperand(0).getReg();
- Register MaskReg = MI.getOperand(1).getReg();
- Register Op1Reg = MI.getOperand(2).getReg();
- Register Op2Reg = MI.getOperand(3).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT MaskTy = MRI.getType(MaskReg);
+ auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
+ MI.getFirst4RegLLTs();
if (!DstTy.isVector())
return UnableToLegalize;
@@ -7591,7 +7574,7 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
Observer.changedInstr(MI);
return Legalized;
}
- return UnableToLegalize;;
+ return UnableToLegalize;
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
@@ -7638,7 +7621,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
// SDAGisms map cleanly to GISel concepts.
if (NewTy.isVector())
NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
unsigned NewTySize = NewTy.getSizeInBytes();
assert(NewTySize > 0 && "Could not find appropriate type");
@@ -7826,9 +7809,7 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
+ auto [Dst, Src, Len] = MI.getFirst3Regs();
const auto *MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
@@ -8091,9 +8072,7 @@ LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
+ auto [Dst, Src, Len] = MI.getFirst3Regs();
if (Opc != TargetOpcode::G_MEMSET) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 4b6c3a156709..1f2e481c63e0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include <algorithm>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 7c6eac8c8ce0..49f40495d6fc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -10,6 +10,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -18,7 +20,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -305,7 +307,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
const auto &DL = MF->getFunction().getParent()->getDataLayout();
bool AnyMerged = false;
do {
- unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
+ unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size());
unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
// Compute the biggest store we can generate to handle the number of stores.
unsigned MergeSizeBits;
@@ -400,7 +402,9 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
auto NewStore =
Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
(void) NewStore;
- LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
+ LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
+ << " stores into merged store: " << *NewStore);
+ LLVM_DEBUG(for (auto *MI : Stores) dbgs() << " " << *MI;);
NumStoresMerged += Stores.size();
MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
@@ -445,20 +449,19 @@ bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
for (auto AliasInfo : reverse(C.PotentialAliases)) {
MachineInstr *PotentialAliasOp = AliasInfo.first;
unsigned PreCheckedIdx = AliasInfo.second;
- if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
- // Need to check this alias.
- if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
- AA)) {
- LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
- << " detected\n");
- return true;
- }
- } else {
+ if (static_cast<unsigned>(Idx) < PreCheckedIdx) {
// Once our store index is lower than the index associated with the
// potential alias, we know that we've already checked for this alias
// and all of the earlier potential aliases too.
return false;
}
+ // Need to check this alias.
+ if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
+ AA)) {
+ LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
+ << " detected\n");
+ return true;
+ }
}
return false;
};
@@ -616,11 +619,304 @@ bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
return Changed;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static std::optional<int64_t>
+getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return std::nullopt;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return std::nullopt;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits != 0)
+ return std::nullopt;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return std::nullopt;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return std::nullopt;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
+ SmallPtrSetImpl<GStore *> &DeletedStores) {
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI->getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ if (FoundStores.size() == 1)
+ return false;
+ // We didn't find enough stores to merge into the size of the original
+ // source value, but we may be able to generate a smaller store if we
+ // truncate the source value.
+ WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits());
+ }
+
+ unsigned NumStoresFound = FoundStores.size();
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ unsigned Fast = 0;
+ bool Allowed = TLI->allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresFound; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF))
+ return false;
+
+ Builder.setInstrAndDebugLoc(StoreMI);
+
+ if (WideStoreTy != MRI->getType(WideSrcVal))
+ WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0);
+
+ if (NeedBswap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(),
+ LowestIdxStore->getMMO().getPointerInfo(),
+ LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : FoundStores) {
+ ST->eraseFromParent();
+ DeletedStores.insert(ST);
+ }
+ return true;
+}
+
+bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
+ bool Changed = false;
+ SmallVector<GStore *, 16> Stores;
+ SmallPtrSet<GStore *, 8> DeletedStores;
+ // Walk up the block so we can see the most eligible stores.
+ for (MachineInstr &MI : llvm::reverse(BB))
+ if (auto *StoreMI = dyn_cast<GStore>(&MI))
+ Stores.emplace_back(StoreMI);
+
+ for (auto *StoreMI : Stores) {
+ if (DeletedStores.count(StoreMI))
+ continue;
+ if (mergeTruncStore(*StoreMI, DeletedStores))
+ Changed = true;
+ }
+ return Changed;
+}
+
bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
bool Changed = false;
- for (auto &BB : MF) {
+ for (auto &BB : MF){
Changed |= mergeBlockStores(BB);
+ Changed |= mergeTruncStoresBlock(BB);
+ }
+
+ // Erase all dead instructions left over by the merging.
+ if (Changed) {
+ for (auto &BB : MF) {
+ for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) {
+ if (isTriviallyDead(I, *MRI))
+ I.eraseFromParent();
+ }
+ }
}
+
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index bf4dcc2c2459..55984423e5bc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -54,7 +54,7 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
MachineInstr &MIUse = *MOUse.getParent();
InsertMBB = MIUse.getParent();
if (MIUse.isPHI())
- InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB();
+ InsertMBB = MIUse.getOperand(MOUse.getOperandNo() + 1).getMBB();
return InsertMBB == Def.getParent();
}
@@ -99,7 +99,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
MachineBasicBlock *InsertMBB;
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
dbgs() << "Checking use: " << MIUse
- << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ << " #Opd: " << MOUse.getOperandNo() << '\n');
if (isLocalUse(MOUse, MI, InsertMBB)) {
// Even if we're in the same block, if the block is very large we could
// still have many long live ranges. Try to do intra-block localization
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 9100e064f30f..962b54ec5d6b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -80,11 +80,11 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
- return buildInstr(TargetOpcode::DBG_VALUE)
- .addFrameIndex(FI)
- .addImm(0)
- .addMetadata(Variable)
- .addMetadata(Expr);
+ return insertInstr(buildInstrNoInsert(TargetOpcode::DBG_VALUE)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMetadata(Variable)
+ .addMetadata(Expr));
}
MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
@@ -164,6 +164,15 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res,
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildConstantPool(const DstOp &Res,
+ unsigned Idx) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ auto MIB = buildInstr(TargetOpcode::G_CONSTANT_POOL);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addConstantPoolIndex(Idx);
+ return MIB;
+}
+
MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
unsigned JTI) {
return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {})
@@ -229,17 +238,25 @@ MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
LLT ResTy = Res.getLLTTy(*getMRI());
LLT Op0Ty = Op0.getLLTTy(*getMRI());
- assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
- assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
- "Different vector element types");
- assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
- "Op0 has more elements");
+ assert(ResTy.isVector() && "Res non vector type");
- auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
SmallVector<Register, 8> Regs;
- for (auto Op : Unmerge.getInstr()->defs())
- Regs.push_back(Op.getReg());
- Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0);
+ if (Op0Ty.isVector()) {
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
+ "Op0 has more elements");
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+
+ for (auto Op : Unmerge.getInstr()->defs())
+ Regs.push_back(Op.getReg());
+ } else {
+ assert((ResTy.getSizeInBits() > Op0Ty.getSizeInBits()) &&
+ "Op0 has more size");
+ Regs.push_back(Op0.getReg());
+ }
+ Register Undef =
+ buildUndef(Op0Ty.isVector() ? Op0Ty.getElementType() : Op0Ty).getReg(0);
unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
for (unsigned i = 0; i < NumberOfPadElts; ++i)
Regs.push_back(Undef);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 080f3ca540f2..885a1056b2ea 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -69,8 +69,8 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers", false,
false)
-RegBankSelect::RegBankSelect(Mode RunningMode)
- : MachineFunctionPass(ID), OptMode(RunningMode) {
+RegBankSelect::RegBankSelect(char &PassID, Mode RunningMode)
+ : MachineFunctionPass(PassID), OptMode(RunningMode) {
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
if (RegBankSelectMode != RunningMode)
@@ -162,8 +162,10 @@ bool RegBankSelect::repairReg(
MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY)
.addDef(Dst)
.addUse(Src);
- LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
- << '\n');
+ LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << ':'
+ << printRegClassOrBank(Src, *MRI, TRI)
+ << " to: " << printReg(Dst) << ':'
+ << printRegClassOrBank(Dst, *MRI, TRI) << '\n');
} else {
// TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT
// sequence.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 07448548c295..080600d3cc98 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -230,10 +230,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
return false;
// Instructions without side-effects are dead iff they only define dead vregs.
- for (const auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
-
+ for (const auto &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg))
return false;
@@ -711,14 +708,14 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
const MachinePointerInfo &MPO) {
- auto PSV = MPO.V.dyn_cast<const PseudoSourceValue *>();
+ auto PSV = dyn_cast_if_present<const PseudoSourceValue *>(MPO.V);
if (auto FSPV = dyn_cast_or_null<FixedStackPseudoSourceValue>(PSV)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
return commonAlignment(MFI.getObjectAlign(FSPV->getFrameIndex()),
MPO.Offset);
}
- if (const Value *V = MPO.V.dyn_cast<const Value *>()) {
+ if (const Value *V = dyn_cast_if_present<const Value *>(MPO.V)) {
const Module *M = MF.getFunction().getParent();
return V->getPointerAlignment(M->getDataLayout());
}
@@ -797,7 +794,7 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
auto MaybeCst = getIConstantVRegVal(R, MRI);
if (!MaybeCst)
return std::nullopt;
- return MaybeCst->countLeadingZeros();
+ return MaybeCst->countl_zero();
};
if (Ty.isVector()) {
// Try to constant fold each element.