summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp')
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp638
1 files changed, 477 insertions, 161 deletions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index a98248438e40..e090d87d59a2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -22,6 +22,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -163,6 +164,9 @@ private:
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
+ /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
+ /// SUBREG_TO_REG.
+ bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -171,6 +175,14 @@ private:
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
+
+ /// Helper function to select vector load intrinsics like
+ /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
+ /// \p Opc is the opcode that the selected instruction should use.
+ /// \p NumVecs is the number of vector destinations for the instruction.
+ /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
+ bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
+ MachineInstr &I);
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI);
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -181,6 +193,7 @@ private:
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -263,13 +276,9 @@ private:
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
-
- /// Emit a CSet for an integer compare.
- ///
- /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
- MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder,
- Register SrcReg = AArch64::WZR) const;
+ MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
+ AArch64CC::CondCode Pred,
+ MachineIRBuilder &MIRBuilder) const;
/// Emit a CSet for a FP compare.
///
/// \p Dst is expected to be a 32-bit scalar register.
@@ -367,18 +376,15 @@ private:
return selectAddrModeWRO(Root, Width / 8);
}
- ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
+ ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
+ bool AllowROR = false) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root);
}
ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
- // TODO: selectShiftedRegister should allow for rotates on logical shifts.
- // For now, make them the same. The only difference between the two is that
- // logical shifts are allowed to fold in rotates. Otherwise, these are
- // functionally the same.
- return selectShiftedRegister(Root);
+ return selectShiftedRegister(Root, true);
}
/// Given an extend instruction, determine the correct shift-extend type for
@@ -496,14 +502,18 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
}
if (RB.getID() == AArch64::FPRRegBankID) {
- if (Ty.getSizeInBits() <= 16)
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ return &AArch64::FPR8RegClass;
+ case 16:
return &AArch64::FPR16RegClass;
- if (Ty.getSizeInBits() == 32)
+ case 32:
return &AArch64::FPR32RegClass;
- if (Ty.getSizeInBits() == 64)
+ case 64:
return &AArch64::FPR64RegClass;
- if (Ty.getSizeInBits() == 128)
+ case 128:
return &AArch64::FPR128RegClass;
+ }
return nullptr;
}
@@ -652,7 +662,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
Immed = Root.getCImm()->getZExtValue();
else if (Root.isReg()) {
auto ValAndVReg =
- getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
+ getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
Immed = ValAndVReg->Value.getSExtValue();
@@ -810,6 +820,8 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
return isStore ? AArch64::STRSui : AArch64::LDRSui;
case 64:
return isStore ? AArch64::STRDui : AArch64::LDRDui;
+ case 128:
+ return isStore ? AArch64::STRQui : AArch64::LDRQui;
}
break;
}
@@ -1195,8 +1207,8 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True,
&Optimized]() {
if (Optimized)
return false;
- auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
- auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
+ auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
+ auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
if (!TrueCst && !FalseCst)
return false;
@@ -1301,6 +1313,7 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineRegisterInfo &MRI) {
assert(Reg.isValid() && "Expected valid register!");
+ bool HasZext = false;
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
unsigned Opc = MI->getOpcode();
@@ -1314,6 +1327,9 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
// on the truncated x is the same as the bit number on x.
if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
Opc == TargetOpcode::G_TRUNC) {
+ if (Opc == TargetOpcode::G_ZEXT)
+ HasZext = true;
+
Register NextReg = MI->getOperand(1).getReg();
// Did we find something worth folding?
if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
@@ -1334,16 +1350,20 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
case TargetOpcode::G_XOR: {
TestReg = MI->getOperand(1).getReg();
Register ConstantReg = MI->getOperand(2).getReg();
- auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!VRegAndVal) {
// AND commutes, check the other side for a constant.
// FIXME: Can we canonicalize the constant so that it's always on the
// same side at some point earlier?
std::swap(ConstantReg, TestReg);
- VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
+ }
+ if (VRegAndVal) {
+ if (HasZext)
+ C = VRegAndVal->Value.getZExtValue();
+ else
+ C = VRegAndVal->Value.getSExtValue();
}
- if (VRegAndVal)
- C = VRegAndVal->Value.getSExtValue();
break;
}
case TargetOpcode::G_ASHR:
@@ -1351,7 +1371,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
case TargetOpcode::G_SHL: {
TestReg = MI->getOperand(1).getReg();
auto VRegAndVal =
- getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
C = VRegAndVal->Value.getSExtValue();
break;
@@ -1479,7 +1499,7 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
- auto MaybeBit = getConstantVRegValWithLookThrough(
+ auto MaybeBit = getIConstantVRegValWithLookThrough(
AndInst.getOperand(2).getReg(), *MIB.getMRI());
if (!MaybeBit)
return false;
@@ -1555,7 +1575,7 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
Register RHS = ICmp.getOperand(3).getReg();
// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
- auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
// When we can emit a TB(N)Z, prefer that.
@@ -1590,7 +1610,7 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
if (ICmpInst::isEquality(Pred)) {
if (!VRegAndVal) {
std::swap(RHS, LHS);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
}
@@ -2049,7 +2069,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
// selector which will match the register variant.
assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
const auto &MO = I.getOperand(2);
- auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
+ auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
if (!VRegAndVal)
return false;
@@ -2131,7 +2151,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// Before selecting a DUP instruction, check if it is better selected as a
// MOV or load from a constant pool.
Register Src = I.getOperand(1).getReg();
- auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
@@ -2145,17 +2165,14 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
I.eraseFromParent();
return true;
}
- case TargetOpcode::G_BR: {
- // If the branch jumps to the fallthrough block, don't bother emitting it.
- // Only do this for -O0 for a good code size improvement, because when
- // optimizations are enabled we want to leave this choice to
- // MachineBlockPlacement.
- bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
- if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
- return false;
- I.eraseFromParent();
- return true;
- }
+ case TargetOpcode::G_SEXT:
+ // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
+ // over a normal extend.
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+ return false;
+ case TargetOpcode::G_BR:
+ return false;
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
@@ -2192,27 +2209,55 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// fold the add into the cset for the cmp by using cinc.
//
// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
- Register X = I.getOperand(1).getReg();
-
- // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
- // early if we see it.
- LLT Ty = MRI.getType(X);
- if (Ty.isVector() || Ty.getSizeInBits() != 32)
+ Register AddDst = I.getOperand(0).getReg();
+ Register AddLHS = I.getOperand(1).getReg();
+ Register AddRHS = I.getOperand(2).getReg();
+ // Only handle scalars.
+ LLT Ty = MRI.getType(AddLHS);
+ if (Ty.isVector())
return false;
-
- Register CmpReg = I.getOperand(2).getReg();
- MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
+ // bits.
+ unsigned Size = Ty.getSizeInBits();
+ if (Size != 32 && Size != 64)
+ return false;
+ auto MatchCmp = [&](Register Reg) -> MachineInstr * {
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return nullptr;
+ // If the LHS of the add is 32 bits, then we want to fold a 32-bit
+ // compare.
+ if (Size == 32)
+ return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
+ // We model scalar compares using 32-bit destinations right now.
+ // If it's a 64-bit compare, it'll have 64-bit sources.
+ Register ZExt;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
+ return nullptr;
+ auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
+ if (!Cmp ||
+ MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
+ return nullptr;
+ return Cmp;
+ };
+ // Try to match
+ // z + (cmp pred, x, y)
+ MachineInstr *Cmp = MatchCmp(AddRHS);
if (!Cmp) {
- std::swap(X, CmpReg);
- Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ // (cmp pred, x, y) + z
+ std::swap(AddLHS, AddRHS);
+ Cmp = MatchCmp(AddRHS);
if (!Cmp)
return false;
}
- auto Pred =
- static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
- emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
- Cmp->getOperand(1), MIB);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
+ auto &PredOp = Cmp->getOperand(1);
+ auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
+ const AArch64CC::CondCode InvCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
+ MIB.setInstrAndDebugLoc(I);
+ emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
+ /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
+ emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
I.eraseFromParent();
return true;
}
@@ -2352,10 +2397,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
unsigned Size = Ty.getSizeInBits();
unsigned Opc = OpcTable[IsSigned][Size == 64];
auto Cst1 =
- getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
assert(Cst1 && "Should have gotten a constant for src 1?");
auto Cst2 =
- getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
assert(Cst2 && "Should have gotten a constant for src 2?");
auto LSB = Cst1->Value.getZExtValue();
auto Width = Cst2->Value.getZExtValue();
@@ -2456,10 +2501,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
- if (Ty != s32 && Ty != s64 && Ty != s128) {
+ if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- << " constant, expected: " << s32 << " or " << s64
- << " or " << s128 << '\n');
+ << " constant, expected: " << s16 << " or " << s32
+ << " or " << s64 << " or " << s128 << '\n');
return false;
}
@@ -2493,23 +2538,20 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
}
- // We allow G_CONSTANT of types < 32b.
- const unsigned MovOpc =
- DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
-
if (isFP) {
- // Either emit a FMOV, or emit a copy to emit a normal mov.
- const TargetRegisterClass &GPRRC =
- DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
- const TargetRegisterClass &FPRRC =
- DefSize == 32 ? AArch64::FPR32RegClass
- : (DefSize == 64 ? AArch64::FPR64RegClass
- : AArch64::FPR128RegClass);
-
- // For 64b values, emit a constant pool load instead.
- // For s32, use a cp load if we have optsize/minsize.
- if (DefSize == 64 || DefSize == 128 ||
- (DefSize == 32 && shouldOptForSize(&MF))) {
+ const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
+ // For 16, 64, and 128b values, emit a constant pool load.
+ switch (DefSize) {
+ default:
+ llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
+ case 32:
+ // For s32, use a cp load if we have optsize/minsize.
+ if (!shouldOptForSize(&MF))
+ break;
+ LLVM_FALLTHROUGH;
+ case 16:
+ case 64:
+ case 128: {
auto *FPImm = I.getOperand(1).getFPImm();
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
if (!LoadMI) {
@@ -2520,9 +2562,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
+ }
- // Nope. Emit a copy and use a normal mov instead.
- const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
+ // Either emit a FMOV, or emit a copy to emit a normal mov.
+ assert(DefSize == 32 &&
+ "Expected constant pool loads for all sizes other than 32!");
+ const Register DefGPRReg =
+ MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineOperand &RegOp = I.getOperand(0);
RegOp.setReg(DefGPRReg);
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
@@ -2545,6 +2591,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.getOperand(1).ChangeToImmediate(Val);
}
+ const unsigned MovOpc =
+ DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
I.setDesc(TII.get(MovOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
@@ -2693,8 +2741,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
+ GLoadStore &LdSt = cast<GLoadStore>(I);
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
- LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
+ LLT PtrTy = MRI.getType(LdSt.getPointerReg());
if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
@@ -2702,26 +2751,33 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
}
- auto &MemOp = **I.memoperands_begin();
- uint64_t MemSizeInBytes = MemOp.getSize();
- unsigned MemSizeInBits = MemSizeInBytes * 8;
- AtomicOrdering Order = MemOp.getSuccessOrdering();
+ uint64_t MemSizeInBytes = LdSt.getMemSize();
+ unsigned MemSizeInBits = LdSt.getMemSizeInBits();
+ AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
// Need special instructions for atomics that affect ordering.
if (Order != AtomicOrdering::NotAtomic &&
Order != AtomicOrdering::Unordered &&
Order != AtomicOrdering::Monotonic) {
- assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
+ assert(!isa<GZExtLoad>(LdSt));
if (MemSizeInBytes > 64)
return false;
- if (I.getOpcode() == TargetOpcode::G_LOAD) {
+ if (isa<GLoad>(LdSt)) {
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
AArch64::LDARW, AArch64::LDARX};
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
} else {
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
+ Register ValReg = LdSt.getReg(0);
+ if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
+ // Emit a subreg copy of 32 bits.
+ Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
+ .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
+ I.getOperand(0).setReg(NewVal);
+ }
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -2729,22 +2785,64 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
#ifndef NDEBUG
- const Register PtrReg = I.getOperand(1).getReg();
+ const Register PtrReg = LdSt.getPointerReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
- // Sanity-check the pointer register.
+ // Check that the pointer register is valid.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer");
#endif
- const Register ValReg = I.getOperand(0).getReg();
+ const Register ValReg = LdSt.getReg(0);
+ const LLT ValTy = MRI.getType(ValReg);
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
+ // The code below doesn't support truncating stores, so we need to split it
+ // again.
+ if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+ unsigned SubReg;
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+
+ // Generate a subreg copy.
+ auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
+ .addReg(ValReg, 0, SubReg)
+ .getReg(0);
+ RBI.constrainGenericRegister(Copy, *RC, MRI);
+ LdSt.getOperand(0).setReg(Copy);
+ } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+ // If this is an any-extending load from the FPR bank, split it into a regular
+ // load + extend.
+ if (RB.getID() == AArch64::FPRRegBankID) {
+ unsigned SubReg;
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+ Register OldDst = LdSt.getReg(0);
+ Register NewDst =
+ MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
+ LdSt.getOperand(0).setReg(NewDst);
+ MRI.setRegBank(NewDst, RB);
+ // Generate a SUBREG_TO_REG to extend it.
+ MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
+ .addImm(0)
+ .addUse(NewDst)
+ .addImm(SubReg);
+ auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
+ RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
+ MIB.setInstr(LdSt);
+ }
+ }
+
// Helper lambda for partially selecting I. Either returns the original
// instruction with an updated opcode, or a new instruction.
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
- bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+ bool IsStore = isa<GStore>(I);
const unsigned NewOpc =
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
if (NewOpc == I.getOpcode())
@@ -2761,7 +2859,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Folded something. Create a new instruction and return it.
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
- IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+ Register CurValReg = I.getOperand(0).getReg();
+ IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
NewInst.cloneMemRefs(I);
for (auto &Fn : *AddrModeFns)
Fn(NewInst);
@@ -2775,9 +2874,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// If we're storing a 0, use WZR/XZR.
if (Opcode == TargetOpcode::G_STORE) {
- auto CVal = getConstantVRegValWithLookThrough(
- LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
- /*HandleFConstants = */ false);
+ auto CVal = getIConstantVRegValWithLookThrough(
+ LoadStore->getOperand(0).getReg(), MRI);
if (CVal && CVal->Value == 0) {
switch (LoadStore->getOpcode()) {
case AArch64::STRWui:
@@ -2897,17 +2995,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// false, so to get the increment when it's true, we need to use the
// inverse. In this case, we want to increment when carry is set.
Register ZReg = AArch64::WZR;
- auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {ZReg, ZReg})
- .addImm(getInvertedCondCode(OpAndCC.second));
- constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
+ emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
+ getInvertedCondCode(OpAndCC.second), MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
- Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
+ Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
// TODO: Implement arbitrary cases
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
@@ -2991,7 +3087,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (Opcode == TargetOpcode::G_PTRTOINT) {
assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
I.setDesc(TII.get(TargetOpcode::COPY));
- return true;
+ return selectCopy(I, TII, MRI, TRI, RBI);
}
}
@@ -2999,6 +3095,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
case TargetOpcode::G_ANYEXT: {
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
@@ -3045,6 +3144,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT_INREG:
case TargetOpcode::G_SEXT: {
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+
unsigned Opcode = I.getOpcode();
const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
const Register DefReg = I.getOperand(0).getReg();
@@ -3231,9 +3333,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
- MIB);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
+ const AArch64CC::CondCode InvCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
+ emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
+ emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
+ /*Src2=*/AArch64::WZR, InvCC, MIB);
I.eraseFromParent();
return true;
}
@@ -3839,6 +3943,10 @@ static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
// Choose a lane copy opcode and subregister based off of the size of the
// vector's elements.
switch (EltSize) {
+ case 8:
+ CopyOpc = AArch64::CPYi8;
+ ExtractSubReg = AArch64::bsub;
+ break;
case 16:
CopyOpc = AArch64::CPYi16;
ExtractSubReg = AArch64::hsub;
@@ -3942,7 +4050,7 @@ bool AArch64InstructionSelector::selectExtractElt(
}
// Find the index to extract from.
- auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
@@ -4164,6 +4272,13 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
+ case 2:
+ LoadMI =
+ &*MIRBuilder
+ .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
+ .addConstantPoolIndex(CPIdx, 0,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ break;
default:
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType());
@@ -4326,7 +4441,7 @@ AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
{AArch64::ANDSXrr, AArch64::ANDSWrr}};
// ANDS needs a logical immediate for its immediate form. Check if we can
// fold one in.
- if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
+ if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
int64_t Imm = ValAndVReg->Value.getSExtValue();
if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
@@ -4368,25 +4483,19 @@ MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?");
#endif
- const Register ZeroReg = AArch64::WZR;
- auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
- auto CSet =
- MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
- .addImm(getInvertedCondCode(CC));
- constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
- return &*CSet;
- };
-
+ const Register ZReg = AArch64::WZR;
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
+ auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
if (CC2 == AArch64CC::AL)
- return EmitCSet(Dst, CC1);
-
+ return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
+ MIRBuilder);
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
Register Def1Reg = MRI.createVirtualRegister(RC);
Register Def2Reg = MRI.createVirtualRegister(RC);
- EmitCSet(Def1Reg, CC1);
- EmitCSet(Def2Reg, CC2);
+ auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
+ emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
+ emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
return &*OrMI;
@@ -4495,16 +4604,25 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat(
}
MachineInstr *
-AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder,
- Register SrcReg) const {
- // CSINC increments the result when the predicate is false. Invert it.
- const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
- CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
- auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
- .addImm(InvCC);
- constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
- return &*I;
+AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
+ Register Src2, AArch64CC::CondCode Pred,
+ MachineIRBuilder &MIRBuilder) const {
+ auto &MRI = *MIRBuilder.getMRI();
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
+ // If we used a register class, then this won't necessarily have an LLT.
+ // Compute the size based off whether or not we have a class or bank.
+ unsigned Size;
+ if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
+ Size = TRI.getRegSizeInBits(*RC);
+ else
+ Size = MRI.getType(Dst).getSizeInBits();
+ // Some opcodes use s1.
+ assert(Size <= 64 && "Expected 64 bits or less only!");
+ static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
+ unsigned Opc = OpcTable[Size == 64];
+ auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
+ constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
+ return &*CSINC;
}
std::pair<MachineInstr *, AArch64CC::CondCode>
@@ -4671,7 +4789,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
if (!CmpInst::isUnsigned(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
// Make sure that the RHS is 0.
- auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
@@ -4792,6 +4910,71 @@ MachineInstr *AArch64InstructionSelector::emitLaneInsert(
return InsElt;
}
+bool AArch64InstructionSelector::selectUSMovFromExtend(
+ MachineInstr &MI, MachineRegisterInfo &MRI) {
+ if (MI.getOpcode() != TargetOpcode::G_SEXT &&
+ MI.getOpcode() != TargetOpcode::G_ZEXT &&
+ MI.getOpcode() != TargetOpcode::G_ANYEXT)
+ return false;
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
+ const Register DefReg = MI.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DefReg);
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ if (DstSize != 32 && DstSize != 64)
+ return false;
+
+ MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
+ MI.getOperand(1).getReg(), MRI);
+ int64_t Lane;
+ if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
+ return false;
+ Register Src0 = Extract->getOperand(1).getReg();
+
+ const LLT &VecTy = MRI.getType(Src0);
+
+ if (VecTy.getSizeInBits() != 128) {
+ const MachineInstr *ScalarToVector = emitScalarToVector(
+ VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
+ assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
+ Src0 = ScalarToVector->getOperand(0).getReg();
+ }
+
+ unsigned Opcode;
+ if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
+ Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
+ else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
+ Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
+ else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
+ Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
+ else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
+ Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
+ else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
+ Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
+ else
+ llvm_unreachable("Unexpected type combo for S/UMov!");
+
+ // We may need to generate one of these, depending on the type and sign of the
+ // input:
+ // DstReg = SMOV Src0, Lane;
+ // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
+ MachineInstr *ExtI = nullptr;
+ if (DstSize == 64 && !IsSigned) {
+ Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
+ ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
+ .addImm(0)
+ .addUse(NewReg)
+ .addImm(AArch64::sub_32);
+ RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
+ } else
+ ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
+
+ constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
@@ -4811,7 +4994,7 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
// Find the definition of the index. Bail out if it's not defined by a
// G_CONSTANT.
Register IdxReg = I.getOperand(3).getReg();
- auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
@@ -4936,6 +5119,47 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
return true;
}
+bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
+ MachineInstr &I, MachineRegisterInfo &MRI) {
+ // Given:
+ // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
+ //
+ // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
+ Register Dst = I.getOperand(0).getReg();
+ Register EltReg = I.getOperand(1).getReg();
+ LLT EltTy = MRI.getType(EltReg);
+ // If the index isn't on the same bank as its elements, then this can't be a
+ // SUBREG_TO_REG.
+ const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
+ const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
+ if (EltRB != DstRB)
+ return false;
+ if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
+ [&MRI](const MachineOperand &Op) {
+ return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
+ MRI);
+ }))
+ return false;
+ unsigned SubReg;
+ const TargetRegisterClass *EltRC =
+ getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
+ if (!EltRC)
+ return false;
+ const TargetRegisterClass *DstRC =
+ getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
+ if (!DstRC)
+ return false;
+ if (!getSubRegForClass(EltRC, TRI, SubReg))
+ return false;
+ auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
+ .addImm(0)
+ .addUse(EltReg)
+ .addImm(SubReg);
+ I.eraseFromParent();
+ constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
+ return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
+}
+
bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
@@ -4947,6 +5171,9 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
if (tryOptConstantBuildVec(I, DstTy, MRI))
return true;
+ if (tryOptBuildVecToSubregToReg(I, MRI))
+ return true;
+
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
@@ -5013,24 +5240,45 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
return true;
}
-/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
-/// ID if it exists, and 0 otherwise.
-static unsigned findIntrinsicID(MachineInstr &I) {
- auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
- return Op.isIntrinsicID();
- });
- if (IntrinOp == I.operands_end())
- return 0;
- return IntrinOp->getIntrinsicID();
+bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
+ unsigned NumVecs,
+ MachineInstr &I) {
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ assert(Opc && "Expected an opcode?");
+ assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 64 || Size == 128) &&
+ "Destination must be 64 bits or 128 bits?");
+ unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
+ auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
+ assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
+ auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
+ Load.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
+ Register SelectedLoadDst = Load->getOperand(0).getReg();
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
+ .addReg(SelectedLoadDst, 0, SubReg + Idx);
+ // Emit the subreg copies and immediately select them.
+ // FIXME: We should refactor our copy code into an emitCopy helper and
+ // clean up uses of this pattern elsewhere in the selector.
+ selectCopy(*Vec, TII, MRI, TRI, RBI);
+ }
+ return true;
}
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineRegisterInfo &MRI) {
// Find the intrinsic ID.
- unsigned IntrinID = findIntrinsicID(I);
- if (!IntrinID)
- return false;
+ unsigned IntrinID = I.getIntrinsicID();
+ const LLT S8 = LLT::scalar(8);
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT P0 = LLT::pointer(0, 64);
// Select the instruction.
switch (IntrinID) {
default:
@@ -5055,16 +5303,59 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MIB.buildInstr(AArch64::BRK, {}, {})
.addImm(I.getOperand(1).getImm() | ('U' << 8));
break;
+ case Intrinsic::aarch64_neon_ld2: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD2Twov8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD2Twov16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD2Twov4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD2Twov8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD2Twov2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD2Twov4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD2Twov2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Twov1d;
+ else
+ llvm_unreachable("Unexpected type for ld2!");
+ selectVectorLoadIntrinsic(Opc, 2, I);
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld4: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD4Fourv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD4Fourv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD4Fourv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD4Fourv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD4Fourv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD4Fourv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD4Fourv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Fourv1d;
+ else
+ llvm_unreachable("Unexpected type for ld4!");
+ selectVectorLoadIntrinsic(Opc, 4, I);
+ break;
+ }
case Intrinsic::aarch64_neon_st2: {
Register Src1 = I.getOperand(1).getReg();
Register Src2 = I.getOperand(2).getReg();
Register Ptr = I.getOperand(3).getReg();
LLT Ty = MRI.getType(Src1);
- const LLT S8 = LLT::scalar(8);
- const LLT S16 = LLT::scalar(16);
- const LLT S32 = LLT::scalar(32);
- const LLT S64 = LLT::scalar(64);
- const LLT P0 = LLT::pointer(0, 64);
unsigned Opc;
if (Ty == LLT::fixed_vector(8, S8))
Opc = AArch64::ST2Twov8b;
@@ -5100,9 +5391,7 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MachineRegisterInfo &MRI) {
- unsigned IntrinID = findIntrinsicID(I);
- if (!IntrinID)
- return false;
+ unsigned IntrinID = I.getIntrinsicID();
switch (IntrinID) {
default:
@@ -5146,6 +5435,33 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case Intrinsic::ptrauth_sign: {
+ Register DstReg = I.getOperand(0).getReg();
+ Register ValReg = I.getOperand(2).getReg();
+ uint64_t Key = I.getOperand(3).getImm();
+ Register DiscReg = I.getOperand(4).getReg();
+ auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
+ bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
+
+ if (Key > 3)
+ return false;
+
+ unsigned Opcodes[][4] = {
+ {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
+ {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
+ unsigned Opcode = Opcodes[IsDiscZero][Key];
+
+ auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
+
+ if (!IsDiscZero) {
+ PAC.addUse(DiscReg);
+ RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
+ }
+
+ RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
+ I.eraseFromParent();
+ return true;
+ }
case Intrinsic::frameaddress:
case Intrinsic::returnaddress: {
MachineFunction &MF = *I.getParent()->getParent();
@@ -5403,7 +5719,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// constant is the RHS.
Register OffsetReg = OffsetInst->getOperand(1).getReg();
Register ConstantReg = OffsetInst->getOperand(2).getReg();
- auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg) {
// We didn't get a constant on the RHS. If the opcode is a shift, then
// we're done.
@@ -5412,7 +5728,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// If we have a G_MUL, we can use either register. Try looking at the RHS.
std::swap(OffsetReg, ConstantReg);
- ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg)
return None;
}
@@ -5580,7 +5896,7 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
// mov x0, wide
// ldr x2, [base, x0]
auto ValAndVReg =
- getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
if (ValAndVReg) {
unsigned Scale = Log2_32(SizeInBytes);
int64_t ImmOff = ValAndVReg->Value.getSExtValue();
@@ -5839,7 +6155,6 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
/// Given a shift instruction, return the correct shift type for that
/// instruction.
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
- // TODO: Handle AArch64_AM::ROR
switch (MI.getOpcode()) {
default:
return AArch64_AM::InvalidShiftExtend;
@@ -5849,15 +6164,16 @@ static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
return AArch64_AM::LSR;
case TargetOpcode::G_ASHR:
return AArch64_AM::ASR;
+ case TargetOpcode::G_ROTR:
+ return AArch64_AM::ROR;
}
}
/// Select a "shifted register" operand. If the value is not shifted, set the
/// shift operand to a default value of "lsl 0".
-///
-/// TODO: Allow shifted register to be rotated in logical instructions.
InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
+AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
+ bool AllowROR) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI =
@@ -5865,14 +6181,14 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
// Check if the operand is defined by an instruction which corresponds to
// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
- //
- // TODO: Handle AArch64_AM::ROR for logical instructions.
MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
if (!ShiftInst)
return None;
AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
if (ShType == AArch64_AM::InvalidShiftExtend)
return None;
+ if (ShType == AArch64_AM::ROR && !AllowROR)
+ return None;
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
return None;
@@ -6045,7 +6361,7 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
Optional<int64_t> CstVal =
- getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
+ getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}