summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
Diffstat (limited to 'llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp')
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp1049
1 files changed, 628 insertions, 421 deletions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 5259f4f5a4d0..a98248438e40 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -11,12 +11,14 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
+#include "AArch64GlobalISelUtils.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "AArch64GlobalISelUtils.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
@@ -24,16 +26,17 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -46,6 +49,12 @@
using namespace llvm;
using namespace MIPatternMatch;
+using namespace AArch64GISelUtils;
+
+namespace llvm {
+class BlockFrequencyInfo;
+class ProfileSummaryInfo;
+}
namespace {
@@ -62,9 +71,11 @@ public:
bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
- void setupMF(MachineFunction &MF, GISelKnownBits &KB,
- CodeGenCoverage &CoverageInfo) override {
- InstructionSelector::setupMF(MF, KB, CoverageInfo);
+ void setupMF(MachineFunction &MF, GISelKnownBits *KB,
+ CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) override {
+ InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
+ MIB.setMF(MF);
// hasFnAttribute() is expensive to call on every BRCOND selection, so
// cache it here for each run of the selector.
@@ -85,12 +96,12 @@ private:
bool preISelLower(MachineInstr &I);
// An early selection function that runs before the selectImpl() call.
- bool earlySelect(MachineInstr &I) const;
+ bool earlySelect(MachineInstr &I);
// Do some preprocessing of G_PHIs before we begin selection.
void processPHIs(MachineFunction &MF);
- bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
bool contractCrossBankCopyIntoStore(MachineInstr &I,
@@ -117,10 +128,10 @@ private:
///@}
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
- MachineRegisterInfo &MRI) const;
+ MachineRegisterInfo &MRI);
- bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
// Helper to generate an equivalent of scalar_to_vector into a new register,
// returned via 'Dst'.
@@ -139,28 +150,37 @@ private:
Register EltReg, unsigned LaneIdx,
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
- bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
+ /// Emit a sequence of instructions representing a constant \p CV for a
+ /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
+ ///
+ /// \returns the last instruction in the sequence on success, and nullptr
+ /// otherwise.
+ MachineInstr *emitConstantVector(Register Dst, Constant *CV,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI);
+
+ bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
- MachineRegisterInfo &MRI) const;
- bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ MachineRegisterInfo &MRI);
+ bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectSplitVectorUnmerge(MachineInstr &I,
- MachineRegisterInfo &MRI) const;
+ bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectIntrinsicWithSideEffects(MachineInstr &I,
- MachineRegisterInfo &MRI) const;
+ MachineRegisterInfo &MRI);
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -244,17 +264,12 @@ private:
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
- /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
- /// materialized using a FMOV instruction, then update MI and return it.
- /// Otherwise, do nothing and return a nullptr.
- MachineInstr *emitFMovForFConstant(MachineInstr &MI,
- MachineRegisterInfo &MRI) const;
-
/// Emit a CSet for an integer compare.
///
- /// \p DefReg is expected to be a 32-bit scalar register.
+ /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &MIRBuilder,
+ Register SrcReg = AArch64::WZR) const;
/// Emit a CSet for a FP compare.
///
/// \p Dst is expected to be a 32-bit scalar register.
@@ -392,13 +407,18 @@ private:
int OpIdx = -1) const;
void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
+ void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx = -1) const;
+ void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx = -1) const;
+ void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx = -1) const;
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
- void materializeLargeCMVal(MachineInstr &I, const Value *V,
- unsigned OpFlags) const;
+ void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
// Optimization methods.
- bool tryOptSelect(MachineInstr &MI) const;
+ bool tryOptSelect(MachineInstr &MI);
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
@@ -424,6 +444,8 @@ private:
// clobbered by calls.
Register MFReturnAddr;
+ MachineIRBuilder MIB;
+
#define GET_GLOBALISEL_PREDICATES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
@@ -468,6 +490,8 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
if (Ty.getSizeInBits() == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
+ if (Ty.getSizeInBits() == 128)
+ return &AArch64::XSeqPairsClassRegClass;
return nullptr;
}
@@ -500,6 +524,8 @@ getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
if (SizeInBits == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
+ if (SizeInBits == 128)
+ return &AArch64::XSeqPairsClassRegClass;
}
if (RegBankID == AArch64::FPRRegBankID) {
@@ -562,6 +588,58 @@ static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
}
}
+/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
+/// Helper function for functions like createDTuple and createQTuple.
+///
+/// \p RegClassIDs - The list of register class IDs available for some tuple of
+/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
+/// expected to contain between 2 and 4 tuple classes.
+///
+/// \p SubRegs - The list of subregister classes associated with each register
+/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
+/// subregister class. The index of each subregister class is expected to
+/// correspond with the index of each register class.
+///
+/// \returns Either the destination register of REG_SEQUENCE instruction that
+/// was created, or the 0th element of \p Regs if \p Regs contains a single
+/// element.
+static Register createTuple(ArrayRef<Register> Regs,
+ const unsigned RegClassIDs[],
+ const unsigned SubRegs[], MachineIRBuilder &MIB) {
+ unsigned NumRegs = Regs.size();
+ if (NumRegs == 1)
+ return Regs[0];
+ assert(NumRegs >= 2 && NumRegs <= 4 &&
+ "Only support between two and 4 registers in a tuple!");
+ const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
+ auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
+ auto RegSequence =
+ MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
+ for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
+ RegSequence.addUse(Regs[I]);
+ RegSequence.addImm(SubRegs[I]);
+ }
+ return RegSequence.getReg(0);
+}
+
+/// Create a tuple of D-registers using the registers in \p Regs.
+static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
+ static const unsigned RegClassIDs[] = {
+ AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
+ static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2, AArch64::dsub3};
+ return createTuple(Regs, RegClassIDs, SubRegs, MIB);
+}
+
+/// Create a tuple of Q-registers using the registers in \p Regs.
+static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
+ static const unsigned RegClassIDs[] = {
+ AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
+ static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3};
+ return createTuple(Regs, RegClassIDs, SubRegs, MIB);
+}
+
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
auto &MI = *Root.getParent();
auto &MBB = *MI.getParent();
@@ -865,8 +943,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
#ifndef NDEBUG
ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
assert(ValidCopy && "Invalid copy.");
- (void)KnownValid;
#endif
+ (void)KnownValid;
return ValidCopy;
};
@@ -932,6 +1010,15 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
<< " operand\n");
return false;
}
+
+ // If this a GPR ZEXT that we want to just reduce down into a copy.
+ // The sizes will be mismatched with the source < 32b but that's ok.
+ if (I.getOpcode() == TargetOpcode::G_ZEXT) {
+ I.setDesc(TII.get(AArch64::COPY));
+ assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
+ return selectCopy(I, TII, MRI, TRI, RBI);
+ }
+
I.setDesc(TII.get(AArch64::COPY));
return CheckCopy();
}
@@ -1085,7 +1172,9 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True,
//
// Into:
// %select = CSINC %reg, %x, cc
- if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
+ if (mi_match(Reg, MRI,
+ m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
+ m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
Reg = MatchReg;
if (Invert) {
@@ -1208,60 +1297,6 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
}
}
-static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
- AArch64CC::CondCode &CondCode,
- AArch64CC::CondCode &CondCode2) {
- CondCode2 = AArch64CC::AL;
- switch (P) {
- default:
- llvm_unreachable("Unknown FP condition!");
- case CmpInst::FCMP_OEQ:
- CondCode = AArch64CC::EQ;
- break;
- case CmpInst::FCMP_OGT:
- CondCode = AArch64CC::GT;
- break;
- case CmpInst::FCMP_OGE:
- CondCode = AArch64CC::GE;
- break;
- case CmpInst::FCMP_OLT:
- CondCode = AArch64CC::MI;
- break;
- case CmpInst::FCMP_OLE:
- CondCode = AArch64CC::LS;
- break;
- case CmpInst::FCMP_ONE:
- CondCode = AArch64CC::MI;
- CondCode2 = AArch64CC::GT;
- break;
- case CmpInst::FCMP_ORD:
- CondCode = AArch64CC::VC;
- break;
- case CmpInst::FCMP_UNO:
- CondCode = AArch64CC::VS;
- break;
- case CmpInst::FCMP_UEQ:
- CondCode = AArch64CC::EQ;
- CondCode2 = AArch64CC::VS;
- break;
- case CmpInst::FCMP_UGT:
- CondCode = AArch64CC::HI;
- break;
- case CmpInst::FCMP_UGE:
- CondCode = AArch64CC::PL;
- break;
- case CmpInst::FCMP_ULT:
- CondCode = AArch64CC::LT;
- break;
- case CmpInst::FCMP_ULE:
- CondCode = AArch64CC::LE;
- break;
- case CmpInst::FCMP_UNE:
- CondCode = AArch64CC::NE;
- break;
- }
-}
-
/// Return a register which can be used as a bit to test in a TB(N)Z.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineRegisterInfo &MRI) {
@@ -1605,7 +1640,7 @@ bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
}
bool AArch64InstructionSelector::selectCompareBranch(
- MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
Register CondReg = I.getOperand(0).getReg();
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
@@ -1615,7 +1650,6 @@ bool AArch64InstructionSelector::selectCompareBranch(
// Try to select the G_BRCOND using whatever is feeding the condition if
// possible.
- MachineIRBuilder MIB(I);
unsigned CCMIOpc = CCMI->getOpcode();
if (CCMIOpc == TargetOpcode::G_FCMP)
return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
@@ -1650,23 +1684,7 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
MachineInstr *OpMI = MRI.getVRegDef(Reg);
assert(OpMI && "Expected to find a vreg def for vector shift operand");
- if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
- return None;
-
- // Check all operands are identical immediates.
- int64_t ImmVal = 0;
- for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
- auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
- if (!VRegAndVal)
- return None;
-
- if (Idx == 1)
- ImmVal = VRegAndVal->Value.getSExtValue();
- if (ImmVal != VRegAndVal->Value.getSExtValue())
- return None;
- }
-
- return ImmVal;
+ return getAArch64VectorSplatScalar(*OpMI, MRI);
}
/// Matches and returns the shift immediate value for a SHL instruction given
@@ -1703,8 +1721,8 @@ static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegiste
return Imm;
}
-bool AArch64InstructionSelector::selectVectorSHL(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_SHL);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
@@ -1719,26 +1737,25 @@ bool AArch64InstructionSelector::selectVectorSHL(
Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
unsigned Opc = 0;
- if (Ty == LLT::vector(2, 64)) {
+ if (Ty == LLT::fixed_vector(2, 64)) {
Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
- } else if (Ty == LLT::vector(4, 32)) {
+ } else if (Ty == LLT::fixed_vector(4, 32)) {
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
- } else if (Ty == LLT::vector(2, 32)) {
+ } else if (Ty == LLT::fixed_vector(2, 32)) {
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
- } else if (Ty == LLT::vector(4, 16)) {
+ } else if (Ty == LLT::fixed_vector(4, 16)) {
Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
- } else if (Ty == LLT::vector(8, 16)) {
+ } else if (Ty == LLT::fixed_vector(8, 16)) {
Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
- } else if (Ty == LLT::vector(16, 8)) {
+ } else if (Ty == LLT::fixed_vector(16, 8)) {
Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
- } else if (Ty == LLT::vector(8, 8)) {
+ } else if (Ty == LLT::fixed_vector(8, 8)) {
Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
}
- MachineIRBuilder MIB(I);
auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
if (ImmVal)
Shl.addImm(*ImmVal);
@@ -1750,7 +1767,7 @@ bool AArch64InstructionSelector::selectVectorSHL(
}
bool AArch64InstructionSelector::selectVectorAshrLshr(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_ASHR ||
I.getOpcode() == TargetOpcode::G_LSHR);
Register DstReg = I.getOperand(0).getReg();
@@ -1774,25 +1791,25 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
unsigned NegOpc = 0;
const TargetRegisterClass *RC =
getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
- if (Ty == LLT::vector(2, 64)) {
+ if (Ty == LLT::fixed_vector(2, 64)) {
Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
- } else if (Ty == LLT::vector(4, 32)) {
+ } else if (Ty == LLT::fixed_vector(4, 32)) {
Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
NegOpc = AArch64::NEGv4i32;
- } else if (Ty == LLT::vector(2, 32)) {
+ } else if (Ty == LLT::fixed_vector(2, 32)) {
Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
NegOpc = AArch64::NEGv2i32;
- } else if (Ty == LLT::vector(4, 16)) {
+ } else if (Ty == LLT::fixed_vector(4, 16)) {
Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
NegOpc = AArch64::NEGv4i16;
- } else if (Ty == LLT::vector(8, 16)) {
+ } else if (Ty == LLT::fixed_vector(8, 16)) {
Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
NegOpc = AArch64::NEGv8i16;
- } else if (Ty == LLT::vector(16, 8)) {
+ } else if (Ty == LLT::fixed_vector(16, 8)) {
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
- NegOpc = AArch64::NEGv8i16;
- } else if (Ty == LLT::vector(8, 8)) {
+ NegOpc = AArch64::NEGv16i8;
+ } else if (Ty == LLT::fixed_vector(8, 8)) {
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
NegOpc = AArch64::NEGv8i8;
} else {
@@ -1800,7 +1817,6 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
return false;
}
- MachineIRBuilder MIB(I);
auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
@@ -1842,11 +1858,10 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
}
void AArch64InstructionSelector::materializeLargeCMVal(
- MachineInstr &I, const Value *V, unsigned OpFlags) const {
+ MachineInstr &I, const Value *V, unsigned OpFlags) {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineIRBuilder MIB(I);
auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
MovZ->addOperand(MF, I.getOperand(1));
@@ -1907,7 +1922,6 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
assert(AmtMI && "could not find a vreg definition for shift amount");
if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
// Insert a subregister copy to implement a 64->32 trunc
- MachineIRBuilder MIB(I);
auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
.addReg(ShiftReg, 0, AArch64::sub_32);
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
@@ -1915,8 +1929,21 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
}
return true;
}
- case TargetOpcode::G_STORE:
- return contractCrossBankCopyIntoStore(I, MRI);
+ case TargetOpcode::G_STORE: {
+ bool Changed = contractCrossBankCopyIntoStore(I, MRI);
+ MachineOperand &SrcOp = I.getOperand(0);
+ if (MRI.getType(SrcOp.getReg()).isPointer()) {
+ // Allow matching with imported patterns for stores of pointers. Unlike
+ // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
+ // and constrain.
+ auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
+ Register NewSrc = Copy.getReg(0);
+ SrcOp.setReg(NewSrc);
+ RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
+ Changed = true;
+ }
+ return Changed;
+ }
case TargetOpcode::G_PTR_ADD:
return convertPtrAddToAdd(I, MRI);
case TargetOpcode::G_LOAD: {
@@ -1936,11 +1963,10 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
if (!DstTy.getElementType().isPointer())
return false;
- MachineIRBuilder MIB(I);
auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
MRI.setType(I.getOperand(0).getReg(),
DstTy.changeElementType(LLT::scalar(64)));
- MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
I.getOperand(1).setReg(NewSrc.getReg(0));
return true;
}
@@ -1987,8 +2013,8 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
if (PtrTy.getAddressSpace() != 0)
return false;
- MachineIRBuilder MIB(I);
- const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
+ const LLT CastPtrTy =
+ PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
// Set regbanks on the registers.
if (PtrTy.isVector())
@@ -2016,8 +2042,8 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
return true;
}
-bool AArch64InstructionSelector::earlySelectSHL(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
// We try to match the immediate variant of LSL, which is actually an alias
// for a special case of UBFM. Otherwise, we fall back to the imported
// selector which will match the register variant.
@@ -2033,7 +2059,6 @@ bool AArch64InstructionSelector::earlySelectSHL(
bool Is64Bit = DstTy.getSizeInBits() == 64;
auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
- MachineIRBuilder MIB(I);
if (!Imm1Fn || !Imm2Fn)
return false;
@@ -2093,7 +2118,7 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
return true;
}
-bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
+bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -2102,6 +2127,24 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
+ case AArch64::G_DUP: {
+ // Before selecting a DUP instruction, check if it is better selected as a
+ // MOV or load from a constant pool.
+ Register Src = I.getOperand(1).getReg();
+ auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
+ if (!ValAndVReg)
+ return false;
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ Register Dst = I.getOperand(0).getReg();
+ auto *CV = ConstantDataVector::getSplat(
+ MRI.getType(Dst).getNumElements(),
+ ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
+ ValAndVReg->Value));
+ if (!emitConstantVector(Dst, CV, MIB, MRI))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
case TargetOpcode::G_BR: {
// If the branch jumps to the fallthrough block, don't bother emitting it.
// Only do this for -O0 for a good code size improvement, because when
@@ -2139,6 +2182,74 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
}
+
+ case TargetOpcode::G_ADD: {
+ // Check if this is being fed by a G_ICMP on either side.
+ //
+ // (cmp pred, x, y) + z
+ //
+ // In the above case, when the cmp is true, we increment z by 1. So, we can
+ // fold the add into the cset for the cmp by using cinc.
+ //
+ // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
+ Register X = I.getOperand(1).getReg();
+
+ // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
+ // early if we see it.
+ LLT Ty = MRI.getType(X);
+ if (Ty.isVector() || Ty.getSizeInBits() != 32)
+ return false;
+
+ Register CmpReg = I.getOperand(2).getReg();
+ MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ if (!Cmp) {
+ std::swap(X, CmpReg);
+ Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ if (!Cmp)
+ return false;
+ }
+ auto Pred =
+ static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
+ emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
+ Cmp->getOperand(1), MIB);
+ emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
+ I.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_OR: {
+ // Look for operations that take the lower `Width=Size-ShiftImm` bits of
+ // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
+ // shifting and masking that we can replace with a BFI (encoded as a BFM).
+ Register Dst = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ if (!Ty.isScalar())
+ return false;
+
+ unsigned Size = Ty.getSizeInBits();
+ if (Size != 32 && Size != 64)
+ return false;
+
+ Register ShiftSrc;
+ int64_t ShiftImm;
+ Register MaskSrc;
+ int64_t MaskImm;
+ if (!mi_match(
+ Dst, MRI,
+ m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
+ m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
+ return false;
+
+ if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
+ return false;
+
+ int64_t Immr = Size - ShiftImm;
+ int64_t Imms = Size - ShiftImm - 1;
+ unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
+ emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
}
@@ -2160,6 +2271,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
}
+ MIB.setInstrAndDebugLoc(I);
+
unsigned Opcode = I.getOpcode();
// G_PHI requires same handling as PHI
if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
@@ -2229,9 +2342,30 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
LLT Ty =
I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
- MachineIRBuilder MIB(I);
-
switch (Opcode) {
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX: {
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::UBFMWri, AArch64::UBFMXri},
+ {AArch64::SBFMWri, AArch64::SBFMXri}};
+ bool IsSigned = Opcode == TargetOpcode::G_SBFX;
+ unsigned Size = Ty.getSizeInBits();
+ unsigned Opc = OpcTable[IsSigned][Size == 64];
+ auto Cst1 =
+ getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
+ assert(Cst1 && "Should have gotten a constant for src 1?");
+ auto Cst2 =
+ getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
+ assert(Cst2 && "Should have gotten a constant for src 2?");
+ auto LSB = Cst1->Value.getZExtValue();
+ auto Width = Cst2->Value.getZExtValue();
+ auto BitfieldInst =
+ MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
+ .addImm(LSB)
+ .addImm(LSB + Width - 1);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
+ }
case TargetOpcode::G_BRCOND:
return selectCompareBranch(I, MF, MRI);
@@ -2256,7 +2390,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
assert(TM.getCodeModel() == CodeModel::Small &&
"Expected small code model");
- MachineIRBuilder MIB(I);
auto Op1 = BaseMI->getOperand(1);
auto Op2 = I.getOperand(2);
auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
@@ -2373,14 +2506,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
: (DefSize == 64 ? AArch64::FPR64RegClass
: AArch64::FPR128RegClass);
- // Can we use a FMOV instruction to represent the immediate?
- if (emitFMovForFConstant(I, MRI))
- return true;
-
// For 64b values, emit a constant pool load instead.
- if (DefSize == 64 || DefSize == 128) {
+ // For s32, use a cp load if we have optsize/minsize.
+ if (DefSize == 64 || DefSize == 128 ||
+ (DefSize == 32 && shouldOptForSize(&MF))) {
auto *FPImm = I.getOperand(1).getFPImm();
- MachineIRBuilder MIB(I);
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
if (!LoadMI) {
LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
@@ -2435,21 +2565,25 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (DstTy.getSizeInBits() != 64)
return false;
+ unsigned Offset = I.getOperand(2).getImm();
+ if (Offset % 64 != 0)
+ return false;
+
+ // Check we have the right regbank always.
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
- // Check we have the right regbank always.
- assert(SrcRB.getID() == AArch64::FPRRegBankID &&
- DstRB.getID() == AArch64::FPRRegBankID &&
- "Wrong extract regbank!");
- (void)SrcRB;
+ assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
+
+ if (SrcRB.getID() == AArch64::GPRRegBankID) {
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
+ I.eraseFromParent();
+ return true;
+ }
// Emit the same code as a vector extract.
// Offset must be a multiple of 64.
- unsigned Offset = I.getOperand(2).getImm();
- if (Offset % 64 != 0)
- return false;
unsigned LaneIdx = Offset / 64;
- MachineIRBuilder MIB(I);
MachineInstr *Extract = emitExtractVectorElt(
DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
if (!Extract)
@@ -2560,8 +2694,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
- MachineIRBuilder MIB(I);
-
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
if (PtrTy != LLT::pointer(0, 64)) {
@@ -2572,18 +2704,29 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
auto &MemOp = **I.memoperands_begin();
uint64_t MemSizeInBytes = MemOp.getSize();
- if (MemOp.isAtomic()) {
- // For now we just support s8 acquire loads to be able to compile stack
- // protector code.
- if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
- MemSizeInBytes == 1) {
- I.setDesc(TII.get(AArch64::LDARB));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ unsigned MemSizeInBits = MemSizeInBytes * 8;
+ AtomicOrdering Order = MemOp.getSuccessOrdering();
+
+ // Need special instructions for atomics that affect ordering.
+ if (Order != AtomicOrdering::NotAtomic &&
+ Order != AtomicOrdering::Unordered &&
+ Order != AtomicOrdering::Monotonic) {
+ assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
+ if (MemSizeInBytes > 64)
+ return false;
+
+ if (I.getOpcode() == TargetOpcode::G_LOAD) {
+ static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
+ AArch64::LDARW, AArch64::LDARX};
+ I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
+ } else {
+ static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
+ AArch64::STLRW, AArch64::STLRX};
+ I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
- LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
- return false;
+ constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ return true;
}
- unsigned MemSizeInBits = MemSizeInBytes * 8;
#ifndef NDEBUG
const Register PtrReg = I.getOperand(1).getReg();
@@ -2737,9 +2880,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
case TargetOpcode::G_PTR_ADD: {
- MachineIRBuilder MIRBuilder(I);
- emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
- MIRBuilder);
+ emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
I.eraseFromParent();
return true;
}
@@ -2748,18 +2889,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_USUBO: {
// Emit the operation and get the correct condition code.
- MachineIRBuilder MIRBuilder(I);
auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
- I.getOperand(2), I.getOperand(3), MIRBuilder);
+ I.getOperand(2), I.getOperand(3), MIB);
// Now, put the overflow result in the register given by the first operand
// to the overflow op. CSINC increments the result when the predicate is
// false, so to get the increment when it's true, we need to use the
// inverse. In this case, we want to increment when carry is set.
Register ZReg = AArch64::WZR;
- auto CsetMI = MIRBuilder
- .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {ZReg, ZReg})
+ auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
+ {ZReg, ZReg})
.addImm(getInvertedCondCode(OpAndCC.second));
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
I.eraseFromParent();
@@ -2832,14 +2971,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
} else if (DstRB.getID() == AArch64::FPRRegBankID) {
- if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
+ if (DstTy == LLT::fixed_vector(4, 16) &&
+ SrcTy == LLT::fixed_vector(4, 32)) {
I.setDesc(TII.get(AArch64::XTNv4i16));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
- MachineIRBuilder MIB(I);
MachineInstr *Extract = emitExtractVectorElt(
DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
if (!Extract)
@@ -2927,7 +3066,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
AArch64::GPRRegBankID &&
"Unexpected ext regbank");
- MachineIRBuilder MIB(I);
MachineInstr *ExtI;
// First check if we're extending the result of a load which has a dest type
@@ -2947,34 +3085,46 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectCopy(I, TII, MRI, TRI, RBI);
}
+ // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
+ // + SUBREG_TO_REG.
+ //
// If we are zero extending from 32 bits to 64 bits, it's possible that
// the instruction implicitly does the zero extend for us. In that case,
- // we can just emit a SUBREG_TO_REG.
+ // we only need the SUBREG_TO_REG.
if (IsGPR && SrcSize == 32 && DstSize == 64) {
// Unlike with the G_LOAD case, we don't want to look through copies
- // here.
+ // here. (See isDef32.)
MachineInstr *Def = MRI.getVRegDef(SrcReg);
- if (Def && isDef32(*Def)) {
- MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
- .addImm(0)
- .addUse(SrcReg)
- .addImm(AArch64::sub_32);
+ Register SubregToRegSrc = SrcReg;
- if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
- MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
- return false;
- }
+ // Does the instruction implicitly zero extend?
+ if (!Def || !isDef32(*Def)) {
+ // No. Zero out using an OR.
+ Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ const Register ZReg = AArch64::WZR;
+ MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
+ SubregToRegSrc = OrDst;
+ }
- if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
- MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
- return false;
- }
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
+ .addImm(0)
+ .addUse(SubregToRegSrc)
+ .addImm(AArch64::sub_32);
- I.eraseFromParent();
- return true;
+ if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
+ MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
+ return false;
}
+
+ if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
+ MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
+ return false;
+ }
+
+ I.eraseFromParent();
+ return true;
}
}
@@ -3061,7 +3211,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Make sure to use an unused vreg instead of wzr, so that the peephole
// optimizations will be able to optimize these.
- MachineIRBuilder MIB(I);
Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
@@ -3081,22 +3230,20 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
}
- MachineIRBuilder MIRBuilder(I);
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
- MIRBuilder);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
+ MIB);
+ emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FCMP: {
- MachineIRBuilder MIRBuilder(I);
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
- MIRBuilder, Pred) ||
- !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
+ if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
+ Pred) ||
+ !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
return false;
I.eraseFromParent();
return true;
@@ -3142,14 +3289,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// difficult because at RBS we may end up pessimizing the fpr case if we
// decided to add an anyextend to fix this. Manual selection is the most
// robust solution for now.
- Register SrcReg = I.getOperand(1).getReg();
- if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
+ if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
+ AArch64::GPRRegBankID)
return false; // We expect the fpr regbank case to be imported.
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.getSizeInBits() == 16)
- I.setDesc(TII.get(AArch64::DUPv8i16gpr));
- else if (SrcTy.getSizeInBits() == 8)
+ LLT VecTy = MRI.getType(I.getOperand(0).getReg());
+ if (VecTy == LLT::fixed_vector(8, 8))
+ I.setDesc(TII.get(AArch64::DUPv8i8gpr));
+ else if (VecTy == LLT::fixed_vector(16, 8))
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
+ else if (VecTy == LLT::fixed_vector(4, 16))
+ I.setDesc(TII.get(AArch64::DUPv4i16gpr));
+ else if (VecTy == LLT::fixed_vector(8, 16))
+ I.setDesc(TII.get(AArch64::DUPv8i16gpr));
else
return false;
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -3182,19 +3333,33 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
}
-bool AArch64InstructionSelector::selectReduction(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
Register VecReg = I.getOperand(1).getReg();
LLT VecTy = MRI.getType(VecReg);
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
+ // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
+ // a subregister copy afterwards.
+ if (VecTy == LLT::fixed_vector(2, 32)) {
+ Register DstReg = I.getOperand(0).getReg();
+ auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
+ {VecReg, VecReg});
+ auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(AddP.getReg(0), 0, AArch64::ssub)
+ .getReg(0);
+ RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
+ }
+
unsigned Opc = 0;
- if (VecTy == LLT::vector(16, 8))
+ if (VecTy == LLT::fixed_vector(16, 8))
Opc = AArch64::ADDVv16i8v;
- else if (VecTy == LLT::vector(8, 16))
+ else if (VecTy == LLT::fixed_vector(8, 16))
Opc = AArch64::ADDVv8i16v;
- else if (VecTy == LLT::vector(4, 32))
+ else if (VecTy == LLT::fixed_vector(4, 32))
Opc = AArch64::ADDVv4i32v;
- else if (VecTy == LLT::vector(2, 64))
+ else if (VecTy == LLT::fixed_vector(2, 64))
Opc = AArch64::ADDPv2i64p;
else {
LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
@@ -3206,9 +3371,9 @@ bool AArch64InstructionSelector::selectReduction(
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
unsigned Opc = 0;
- if (VecTy == LLT::vector(2, 32))
+ if (VecTy == LLT::fixed_vector(2, 32))
Opc = AArch64::FADDPv2i32p;
- else if (VecTy == LLT::vector(2, 64))
+ else if (VecTy == LLT::fixed_vector(2, 64))
Opc = AArch64::FADDPv2i64p;
else {
LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
@@ -3221,12 +3386,11 @@ bool AArch64InstructionSelector::selectReduction(
}
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
- MachineRegisterInfo &MRI) const {
+ MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
Register JTAddr = I.getOperand(0).getReg();
unsigned JTI = I.getOperand(1).getIndex();
Register Index = I.getOperand(2).getReg();
- MachineIRBuilder MIB(I);
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
@@ -3241,15 +3405,14 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
}
-bool AArch64InstructionSelector::selectJumpTable(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
Register DstReg = I.getOperand(0).getReg();
unsigned JTI = I.getOperand(1).getIndex();
// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
- MachineIRBuilder MIB(I);
auto MovMI =
MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
.addJumpTableIndex(JTI, AArch64II::MO_PAGE)
@@ -3259,14 +3422,16 @@ bool AArch64InstructionSelector::selectJumpTable(
}
bool AArch64InstructionSelector::selectTLSGlobalValue(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
if (!STI.isTargetMachO())
return false;
MachineFunction &MF = *I.getParent()->getParent();
MF.getFrameInfo().setAdjustsStack(true);
- const GlobalValue &GV = *I.getOperand(1).getGlobal();
- MachineIRBuilder MIB(I);
+ const auto &GlobalOp = I.getOperand(1);
+ assert(GlobalOp.getOffset() == 0 &&
+ "Shouldn't have an offset on TLS globals!");
+ const GlobalValue &GV = *GlobalOp.getGlobal();
auto LoadGOT =
MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
@@ -3403,7 +3568,7 @@ bool AArch64InstructionSelector::selectIntrinsicRound(
}
bool AArch64InstructionSelector::selectVectorICmp(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
Register DstReg = I.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
Register SrcReg = I.getOperand(2).getReg();
@@ -3558,7 +3723,6 @@ bool AArch64InstructionSelector::selectVectorICmp(
if (SwapOperands)
std::swap(SrcReg, Src2Reg);
- MachineIRBuilder MIB(I);
auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
@@ -3602,7 +3766,7 @@ MachineInstr *AArch64InstructionSelector::emitScalarToVector(
}
bool AArch64InstructionSelector::selectMergeValues(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
@@ -3616,7 +3780,6 @@ bool AArch64InstructionSelector::selectMergeValues(
if (DstTy == LLT::scalar(128)) {
if (SrcTy.getSizeInBits() != 64)
return false;
- MachineIRBuilder MIB(I);
Register DstReg = I.getOperand(0).getReg();
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
@@ -3757,7 +3920,7 @@ MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
}
bool AArch64InstructionSelector::selectExtractElt(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!");
Register DstReg = I.getOperand(0).getReg();
@@ -3784,11 +3947,10 @@ bool AArch64InstructionSelector::selectExtractElt(
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
- MachineIRBuilder MIRBuilder(I);
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
- LaneIdx, MIRBuilder);
+ LaneIdx, MIB);
if (!Extract)
return false;
@@ -3797,7 +3959,7 @@ bool AArch64InstructionSelector::selectExtractElt(
}
bool AArch64InstructionSelector::selectSplitVectorUnmerge(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
unsigned NumElts = I.getNumOperands() - 1;
Register SrcReg = I.getOperand(NumElts).getReg();
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
@@ -3809,8 +3971,6 @@ bool AArch64InstructionSelector::selectSplitVectorUnmerge(
return false;
}
- MachineIRBuilder MIB(I);
-
// We implement a split vector operation by treating the sub-vectors as
// scalars and extracting them.
const RegisterBank &DstRB =
@@ -3826,8 +3986,8 @@ bool AArch64InstructionSelector::selectSplitVectorUnmerge(
return true;
}
-bool AArch64InstructionSelector::selectUnmergeValues(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"unexpected opcode");
@@ -3856,8 +4016,6 @@ bool AArch64InstructionSelector::selectUnmergeValues(
if (!NarrowTy.isScalar())
return selectSplitVectorUnmerge(I, MRI);
- MachineIRBuilder MIB(I);
-
// Choose a lane copy opcode and subregister based off of the size of the
// vector's elements.
unsigned CopyOpc = 0;
@@ -3882,6 +4040,13 @@ bool AArch64InstructionSelector::selectUnmergeValues(
} else {
// No. We have to perform subregister inserts. For each insert, create an
// implicit def and a subregister insert, and save the register we create.
+ const TargetRegisterClass *RC =
+ getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
+ WideTy.getScalarSizeInBits() * NumElts);
+ unsigned SubReg = 0;
+ bool Found = getSubRegForClass(RC, TRI, SubReg);
+ (void)Found;
+ assert(Found && "expected to find last operand's subeg idx");
for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
MachineInstr &ImpDefMI =
@@ -3895,7 +4060,7 @@ bool AArch64InstructionSelector::selectUnmergeValues(
TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
.addUse(ImpDefReg)
.addUse(SrcReg)
- .addImm(AArch64::dsub);
+ .addImm(SubReg);
constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
@@ -3942,14 +4107,13 @@ bool AArch64InstructionSelector::selectUnmergeValues(
}
bool AArch64InstructionSelector::selectConcatVectors(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
"Unexpected opcode");
Register Dst = I.getOperand(0).getReg();
Register Op1 = I.getOperand(1).getReg();
Register Op2 = I.getOperand(2).getReg();
- MachineIRBuilder MIRBuilder(I);
- MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
+ MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
if (!ConcatMI)
return false;
I.eraseFromParent();
@@ -3968,14 +4132,17 @@ AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
- unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
+ auto &MF = MIRBuilder.getMF();
+ unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
auto Adrp =
MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
.addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
MachineInstr *LoadMI = nullptr;
- switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
+ unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
+ switch (Size) {
case 16:
LoadMI =
&*MIRBuilder
@@ -3984,16 +4151,27 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
case 8:
- LoadMI = &*MIRBuilder
- .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
- .addConstantPoolIndex(
- CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ LoadMI =
+ &*MIRBuilder
+ .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
+ .addConstantPoolIndex(CPIdx, 0,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ break;
+ case 4:
+ LoadMI =
+ &*MIRBuilder
+ .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
+ .addConstantPoolIndex(CPIdx, 0,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
default:
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType());
return nullptr;
}
+ LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
+ MachineMemOperand::MOLoad,
+ Size, Align(Size)));
constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
return LoadMI;
@@ -4316,49 +4494,15 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat(
return &*InsElt;
}
-MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
- "Expected a G_FCONSTANT!");
- MachineOperand &ImmOp = I.getOperand(1);
- unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
-
- // Only handle 32 and 64 bit defs for now.
- if (DefSize != 32 && DefSize != 64)
- return nullptr;
-
- // Don't handle null values using FMOV.
- if (ImmOp.getFPImm()->isNullValue())
- return nullptr;
-
- // Get the immediate representation for the FMOV.
- const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
- int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
- : AArch64_AM::getFP64Imm(ImmValAPF);
-
- // If this is -1, it means the immediate can't be represented as the requested
- // floating point value. Bail.
- if (Imm == -1)
- return nullptr;
-
- // Update MI to represent the new FMOV instruction, constrain it, and return.
- ImmOp.ChangeToImmediate(Imm);
- unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
- I.setDesc(TII.get(MovOpc));
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- return &I;
-}
-
MachineInstr *
AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder) const {
+ MachineIRBuilder &MIRBuilder,
+ Register SrcReg) const {
// CSINC increments the result when the predicate is false. Invert it.
const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
- auto I =
- MIRBuilder
- .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
- .addImm(InvCC);
+ auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
+ .addImm(InvCC);
constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
return &*I;
}
@@ -4382,8 +4526,7 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
}
}
-bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
- MachineIRBuilder MIB(I);
+bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
MachineRegisterInfo &MRI = *MIB.getMRI();
// We want to recognize this pattern:
//
@@ -4489,37 +4632,10 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
//
// cmn z, y
- // Helper lambda to detect the subtract followed by the compare.
- // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
- auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
- if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
- return false;
-
- // Need to make sure NZCV is the same at the end of the transformation.
- if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
- return false;
-
- // We want to match against SUBs.
- if (DefMI->getOpcode() != TargetOpcode::G_SUB)
- return false;
-
- // Make sure that we're getting
- // x = G_SUB 0, y
- auto ValAndVReg =
- getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
- if (!ValAndVReg || ValAndVReg->Value != 0)
- return false;
-
- // This can safely be represented as a CMN.
- return true;
- };
-
// Check if the RHS or LHS of the G_ICMP is defined by a SUB
MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
- CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
-
+ auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
// Given this:
//
// x = G_SUB 0, y
@@ -4528,7 +4644,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
// Produce this:
//
// cmn y, z
- if (IsCMN(LHSDef, CC))
+ if (isCMN(LHSDef, P, MRI))
return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
// Same idea here, but with the RHS of the compare instead:
@@ -4541,7 +4657,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
// Produce this:
//
// cmn z, y
- if (IsCMN(RHSDef, CC))
+ if (isCMN(RHSDef, P, MRI))
return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
// Given this:
@@ -4567,7 +4683,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
}
bool AArch64InstructionSelector::selectShuffleVector(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Register Src1Reg = I.getOperand(1).getReg();
const LLT Src1Ty = MRI.getType(Src1Reg);
@@ -4600,11 +4716,9 @@ bool AArch64InstructionSelector::selectShuffleVector(
}
}
- MachineIRBuilder MIRBuilder(I);
-
// Use a constant pool to load the index vector for TBL.
Constant *CPVal = ConstantVector::get(CstIdxs);
- MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
+ MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
if (!IndexLoad) {
LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
return false;
@@ -4613,25 +4727,23 @@ bool AArch64InstructionSelector::selectShuffleVector(
if (DstTy.getSizeInBits() != 128) {
assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
// This case can be done with TBL1.
- MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
+ MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
if (!Concat) {
LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
return false;
}
// The constant pool load will be 64 bits, so need to convert to FPR128 reg.
- IndexLoad =
- emitScalarToVector(64, &AArch64::FPR128RegClass,
- IndexLoad->getOperand(0).getReg(), MIRBuilder);
+ IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
+ IndexLoad->getOperand(0).getReg(), MIB);
- auto TBL1 = MIRBuilder.buildInstr(
+ auto TBL1 = MIB.buildInstr(
AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
{Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
auto Copy =
- MIRBuilder
- .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
+ MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
.addReg(TBL1.getReg(0), 0, AArch64::dsub);
RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
I.eraseFromParent();
@@ -4640,16 +4752,10 @@ bool AArch64InstructionSelector::selectShuffleVector(
// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
// Q registers for regalloc.
- auto RegSeq = MIRBuilder
- .buildInstr(TargetOpcode::REG_SEQUENCE,
- {&AArch64::QQRegClass}, {Src1Reg})
- .addImm(AArch64::qsub0)
- .addUse(Src2Reg)
- .addImm(AArch64::qsub1);
-
- auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
- {RegSeq, IndexLoad->getOperand(0)});
- constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
+ SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
+ auto RegSeq = createQTuple(Regs, MIB);
+ auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
+ {RegSeq, IndexLoad->getOperand(0)});
constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
I.eraseFromParent();
return true;
@@ -4686,8 +4792,8 @@ MachineInstr *AArch64InstructionSelector::emitLaneInsert(
return InsElt;
}
-bool AArch64InstructionSelector::selectInsertElt(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
// Get information on the destination.
@@ -4713,13 +4819,12 @@ bool AArch64InstructionSelector::selectInsertElt(
// Perform the lane insert.
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
- MachineIRBuilder MIRBuilder(I);
if (VecSize < 128) {
// If the vector we're inserting into is smaller than 128 bits, widen it
// to 128 to do the insert.
- MachineInstr *ScalarToVec = emitScalarToVector(
- VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
+ MachineInstr *ScalarToVec =
+ emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
if (!ScalarToVec)
return false;
SrcReg = ScalarToVec->getOperand(0).getReg();
@@ -4729,7 +4834,7 @@ bool AArch64InstructionSelector::selectInsertElt(
// Note that if our vector is already 128 bits, we end up emitting an extra
// register.
MachineInstr *InsMI =
- emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
+ emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
if (VecSize < 128) {
// If we had to widen to perform the insert, then we have to demote back to
@@ -4749,7 +4854,7 @@ bool AArch64InstructionSelector::selectInsertElt(
<< "\n");
return false;
}
- MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addReg(DemoteVec, 0, SubReg);
RBI.constrainGenericRegister(DstReg, *RC, MRI);
} else {
@@ -4762,8 +4867,46 @@ bool AArch64InstructionSelector::selectInsertElt(
return true;
}
+MachineInstr *
+AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) {
+ LLT DstTy = MRI.getType(Dst);
+ unsigned DstSize = DstTy.getSizeInBits();
+ if (CV->isNullValue()) {
+ if (DstSize == 128) {
+ auto Mov =
+ MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+ }
+
+ if (DstSize == 64) {
+ auto Mov =
+ MIRBuilder
+ .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
+ .addImm(0);
+ auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
+ .addReg(Mov.getReg(0), 0, AArch64::dsub);
+ RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
+ return &*Copy;
+ }
+ }
+
+ auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
+ if (!CPLoad) {
+ LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
+ return nullptr;
+ }
+
+ auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
+ RBI.constrainGenericRegister(
+ Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
+ return &*Copy;
+}
+
bool AArch64InstructionSelector::tryOptConstantBuildVec(
- MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
unsigned DstSize = DstTy.getSizeInBits();
assert(DstSize <= 128 && "Unexpected build_vec type!");
@@ -4787,40 +4930,14 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
return false;
}
Constant *CV = ConstantVector::get(Csts);
- MachineIRBuilder MIB(I);
- if (CV->isNullValue()) {
- // Until the importer can support immAllZerosV in pattern leaf nodes,
- // select a zero move manually here.
- Register DstReg = I.getOperand(0).getReg();
- if (DstSize == 128) {
- auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- } else if (DstSize == 64) {
- auto Mov =
- MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
- .addImm(0);
- MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(Mov.getReg(0), 0, AArch64::dsub);
- I.eraseFromParent();
- return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
- }
- }
- auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
- if (!CPLoad) {
- LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
+ if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
return false;
- }
- MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
- RBI.constrainGenericRegister(I.getOperand(0).getReg(),
- *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
- MRI);
I.eraseFromParent();
return true;
}
-bool AArch64InstructionSelector::selectBuildVector(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
// Until we port more of the optimized selections, for now just use a vector
// insert sequence.
@@ -4833,12 +4950,11 @@ bool AArch64InstructionSelector::selectBuildVector(
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
- MachineIRBuilder MIRBuilder(I);
const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
MachineInstr *ScalarToVec =
emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
- I.getOperand(1).getReg(), MIRBuilder);
+ I.getOperand(1).getReg(), MIB);
if (!ScalarToVec)
return false;
@@ -4852,7 +4968,7 @@ bool AArch64InstructionSelector::selectBuildVector(
// Note that if we don't do a subregister copy, we can end up making an
// extra register.
PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
- MIRBuilder);
+ MIB);
DstVec = PrevMI->getOperand(0).getReg();
}
@@ -4881,8 +4997,7 @@ bool AArch64InstructionSelector::selectBuildVector(
Register Reg = MRI.createVirtualRegister(RC);
Register DstReg = I.getOperand(0).getReg();
- MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(DstVec, 0, SubReg);
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(Reg);
RBI.constrainGenericRegister(DstReg, *RC, MRI);
@@ -4910,27 +5025,73 @@ static unsigned findIntrinsicID(MachineInstr &I) {
}
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+ MachineInstr &I, MachineRegisterInfo &MRI) {
// Find the intrinsic ID.
unsigned IntrinID = findIntrinsicID(I);
if (!IntrinID)
return false;
- MachineIRBuilder MIRBuilder(I);
// Select the instruction.
switch (IntrinID) {
default:
return false;
+ case Intrinsic::aarch64_ldxp:
+ case Intrinsic::aarch64_ldaxp: {
+ auto NewI = MIB.buildInstr(
+ IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
+ {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
+ {I.getOperand(3)});
+ NewI.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
+ break;
+ }
case Intrinsic::trap:
- MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
+ MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
break;
case Intrinsic::debugtrap:
- MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
+ MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;
case Intrinsic::ubsantrap:
- MIRBuilder.buildInstr(AArch64::BRK, {}, {})
+ MIB.buildInstr(AArch64::BRK, {}, {})
.addImm(I.getOperand(1).getImm() | ('U' << 8));
break;
+ case Intrinsic::aarch64_neon_st2: {
+ Register Src1 = I.getOperand(1).getReg();
+ Register Src2 = I.getOperand(2).getReg();
+ Register Ptr = I.getOperand(3).getReg();
+ LLT Ty = MRI.getType(Src1);
+ const LLT S8 = LLT::scalar(8);
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT P0 = LLT::pointer(0, 64);
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::ST2Twov8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST2Twov16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::ST2Twov4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST2Twov8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::ST2Twov2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST2Twov4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::ST2Twov2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::ST1Twov1d;
+ else
+ llvm_unreachable("Unexpected type for st2!");
+ SmallVector<Register, 2> Regs = {Src1, Src2};
+ Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
+ : createDTuple(Regs, MIB);
+ auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
+ Store.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
+ break;
+ }
}
I.eraseFromParent();
@@ -4942,7 +5103,6 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
unsigned IntrinID = findIntrinsicID(I);
if (!IntrinID)
return false;
- MachineIRBuilder MIRBuilder(I);
switch (IntrinID) {
default:
@@ -4960,7 +5120,7 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
// the source and destination if they are on GPRs.
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
- MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
+ MIB.buildCopy({SrcReg}, {I.getOperand(2)});
// Make sure the copy ends up getting constrained properly.
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
@@ -4971,14 +5131,14 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
// Actually insert the instruction.
- auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
+ auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
// Did we create a new register for the destination?
if (DstReg != I.getOperand(0).getReg()) {
// Yep. Copy the result of the instruction back into the original
// destination.
- MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
+ MIB.buildCopy({I.getOperand(0)}, {DstReg});
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
AArch64::GPR32RegClass, MRI);
}
@@ -5005,11 +5165,11 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
}
if (STI.hasPAuth()) {
- MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
+ MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
} else {
- MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
- MIRBuilder.buildInstr(AArch64::XPACLRI);
- MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
+ MIB.buildInstr(AArch64::XPACLRI);
+ MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
}
I.eraseFromParent();
@@ -5021,31 +5181,42 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
while (Depth--) {
Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
auto Ldr =
- MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
- .addImm(0);
+ MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
FrameAddr = NextFrame;
}
if (IntrinID == Intrinsic::frameaddress)
- MIRBuilder.buildCopy({DstReg}, {FrameAddr});
+ MIB.buildCopy({DstReg}, {FrameAddr});
else {
MFI.setReturnAddressIsTaken(true);
if (STI.hasPAuth()) {
Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
- MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
+ MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
+ MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
} else {
- MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
- MIRBuilder.buildInstr(AArch64::XPACLRI);
- MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
+ .addImm(1);
+ MIB.buildInstr(AArch64::XPACLRI);
+ MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
}
}
I.eraseFromParent();
return true;
}
+ case Intrinsic::swift_async_context_addr:
+ auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
+ {Register(AArch64::FP)})
+ .addImm(8)
+ .addImm(0);
+ constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
+
+ MF->getFrameInfo().setFrameAddressIsTaken(true);
+ MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
+ I.eraseFromParent();
+ return true;
}
return false;
}
@@ -5168,7 +5339,7 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
// Always fold if there is one use, or if we're optimizing for size.
Register DefReg = MI.getOperand(0).getReg();
if (MRI.hasOneNonDBGUse(DefReg) ||
- MI.getParent()->getParent()->getFunction().hasMinSize())
+ MI.getParent()->getParent()->getFunction().hasOptSize())
return true;
// It's better to avoid folding and recomputing shifts when we don't have a
@@ -5577,8 +5748,10 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
return None;
// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
- // TODO: Need to check GV's offset % size if doing offset folding into globals.
- assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
+ auto Offset = Adrp.getOperand(1).getOffset();
+ if (Offset % Size != 0)
+ return None;
+
auto GV = Adrp.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return None;
@@ -5592,7 +5765,7 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
Register AdrpReg = Adrp.getOperand(0).getReg();
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
[=](MachineInstrBuilder &MIB) {
- MIB.addGlobalAddress(GV, /* Offset */ 0,
+ MIB.addGlobalAddress(GV, Offset,
OpFlags | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC);
}}};
@@ -5736,9 +5909,9 @@ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
assert(Size != 64 && "Extend from 64 bits?");
switch (Size) {
case 8:
- return AArch64_AM::SXTB;
+ return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
case 16:
- return AArch64_AM::SXTH;
+ return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
case 32:
return AArch64_AM::SXTW;
default:
@@ -5751,9 +5924,9 @@ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
assert(Size != 64 && "Extend from 64 bits?");
switch (Size) {
case 8:
- return AArch64_AM::UXTB;
+ return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
case 16:
- return AArch64_AM::UXTH;
+ return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
case 32:
return AArch64_AM::UXTW;
default:
@@ -5895,6 +6068,33 @@ void AArch64InstructionSelector::renderLogicalImm64(
MIB.addImm(Enc);
}
+void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
+ "Expected G_FCONSTANT");
+ MIB.addImm(
+ AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
+}
+
+void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
+ "Expected G_FCONSTANT");
+ MIB.addImm(
+ AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
+}
+
+void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
+ "Expected G_FCONSTANT");
+ MIB.addImm(
+ AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
+}
+
bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
const MachineInstr &MI, unsigned NumBytes) const {
if (!MI.mayLoadOrStore())
@@ -5946,7 +6146,14 @@ static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
// Insert a cross-bank copy.
auto *OpDef = MRI.getVRegDef(OpReg);
const LLT &Ty = MRI.getType(OpReg);
- MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
+ MachineBasicBlock &OpDefBB = *OpDef->getParent();
+
+ // Any instruction we insert must appear after all PHIs in the block
+ // for the block to be valid MIR.
+ MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
+ if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
+ InsertPt = OpDefBB.getFirstNonPHI();
+ MIB.setInsertPt(*OpDef->getParent(), InsertPt);
auto Copy = MIB.buildCopy(Ty, OpReg);
MRI.setRegBank(Copy.getReg(0), *DstRB);
MO.setReg(Copy.getReg(0));