summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp557
1 files changed, 424 insertions, 133 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 54f3f7c101324..5139ae5ccaf19 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -24,9 +24,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -111,6 +111,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// This gets lowered to an instruction sequence which takes 16 bytes
NumBytes = 16;
break;
+ case AArch64::SpeculationBarrierISBDSBEndBB:
+ // This gets lowered to 2 4-byte instructions.
+ NumBytes = 8;
+ break;
+ case AArch64::SpeculationBarrierSBEndBB:
+ // This gets lowered to 1 4-byte instructions.
+ NumBytes = 4;
+ break;
case AArch64::JumpTableDest32:
case AArch64::JumpTableDest16:
case AArch64::JumpTableDest8:
@@ -119,11 +127,25 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case AArch64::SPACE:
NumBytes = MI.getOperand(1).getImm();
break;
+ case TargetOpcode::BUNDLE:
+ NumBytes = getInstBundleLength(MI);
+ break;
}
return NumBytes;
}
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI.getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += getInstSizeInBytes(*I);
+ }
+ return Size;
+}
+
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
SmallVectorImpl<MachineOperand> &Cond) {
// Block ends with fall-through condbranch.
@@ -216,6 +238,12 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
if (I == MBB.end())
return false;
+ // Skip over SpeculationBarrierEndBB terminators
+ if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
+ I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
+ --I;
+ }
+
if (!isUnpredicatedTerminator(*I))
return false;
@@ -496,8 +524,9 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
+ int &TrueCycles,
int &FalseCycles) const {
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -506,6 +535,12 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
if (!RC)
return false;
+ // Also need to check the dest regclass, in case we're trying to optimize
+ // something like:
+ // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
+ if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
+ return false;
+
// Expanding cbz/tbz requires an extra cycle of latency on the condition.
unsigned ExtraCondLat = Cond.size() != 1;
@@ -538,9 +573,9 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
+ const DebugLoc &DL, Register DstReg,
ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg) const {
+ Register TrueReg, Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// Parse the condition code, see parseCondBranch() above.
@@ -910,7 +945,7 @@ bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
}
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
+ Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default:
@@ -935,6 +970,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned WidthA = 0, WidthB = 0;
+ bool OffsetAIsScalable = false, OffsetBIsScalable = false;
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
@@ -948,9 +984,14 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
// base are identical, and the offset of a lower memory access +
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
- if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
- getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
- if (BaseOpA->isIdenticalTo(*BaseOpB)) {
+ // If OffsetAIsScalable and OffsetBIsScalable are both true, they
+ // are assumed to have the same scale (vscale).
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
+ WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
+ WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB) &&
+ OffsetAIsScalable == OffsetBIsScalable) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
@@ -984,8 +1025,8 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
-bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const {
// The first operand can be a frame index where we'd normally expect a
// register.
@@ -1156,10 +1197,9 @@ static bool areCFlagsAccessedBetweenInstrs(
return MI.getIterator() == From;
}) != To->getParent()->rend());
- // We iterate backward starting \p To until we hit \p From.
- for (--To; To != From; --To) {
- const MachineInstr &Instr = *To;
-
+ // We iterate backward starting at \p To until we hit \p From.
+ for (const MachineInstr &Instr :
+ instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
if (((AccessToCheck & AK_Write) &&
Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
@@ -1180,7 +1220,7 @@ static bool areCFlagsAccessedBetweenInstrs(
/// instruction.
/// Only comparison with zero is supported.
bool AArch64InstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
int CmpValue, const MachineRegisterInfo *MRI) const {
assert(CmpInstr.getParent());
assert(MRI);
@@ -1416,10 +1456,9 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
return false;
UsedNZCV NZCVUsedAfterCmp;
- for (auto I = std::next(CmpInstr->getIterator()),
- E = CmpInstr->getParent()->instr_end();
- I != E; ++I) {
- const MachineInstr &Instr = *I;
+ for (const MachineInstr &Instr :
+ instructionsWithoutDebug(std::next(CmpInstr->getIterator()),
+ CmpInstr->getParent()->instr_end())) {
if (Instr.readsRegister(AArch64::NZCV, TRI)) {
AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
@@ -1684,6 +1723,8 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
@@ -1796,9 +1837,37 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STNPSi:
case AArch64::LDG:
case AArch64::STGPi:
+ case AArch64::LD1B_IMM:
+ case AArch64::LD1H_IMM:
+ case AArch64::LD1W_IMM:
+ case AArch64::LD1D_IMM:
+ case AArch64::ST1B_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST1D_IMM:
+ case AArch64::LD1B_H_IMM:
+ case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1H_S_IMM:
+ case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1W_D_IMM:
+ case AArch64::LD1SW_D_IMM:
+ case AArch64::ST1B_H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1W_D_IMM:
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1H_D_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::ST1B_S_IMM:
+ case AArch64::ST1H_D_IMM:
+ case AArch64::LD1B_D_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::ST1B_D_IMM:
return 3;
case AArch64::ADDG:
case AArch64::STGOffset:
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
return 2;
}
}
@@ -1978,20 +2047,25 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
return true;
}
-bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
- unsigned Width;
- return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
+ const MachineOperand *BaseOp;
+ if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
+ Width, TRI))
+ return false;
+ BaseOps.push_back(BaseOp);
+ return true;
}
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
- unsigned &Width, const TargetRegisterInfo *TRI) const {
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
if (LdSt.getNumExplicitOperands() == 3) {
@@ -2010,7 +2084,7 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
// Get the scaling factor for the instruction and set the width for the
// instruction.
- unsigned Scale = 0;
+ TypeSize Scale(0U, false);
int64_t Dummy1, Dummy2;
// If this returns false, then it's an instruction we don't want to handle.
@@ -2022,12 +2096,13 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
// set to 1.
if (LdSt.getNumExplicitOperands() == 3) {
BaseOp = &LdSt.getOperand(1);
- Offset = LdSt.getOperand(2).getImm() * Scale;
+ Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
} else {
assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
BaseOp = &LdSt.getOperand(2);
- Offset = LdSt.getOperand(3).getImm() * Scale;
+ Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
}
+ OffsetIsScalable = Scale.isScalable();
if (!BaseOp->isReg() && !BaseOp->isFI())
return false;
@@ -2043,26 +2118,28 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
return OfsOp;
}
-bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
+bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
unsigned &Width, int64_t &MinOffset,
int64_t &MaxOffset) {
+ const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {
// Not a memory operation or something we want to handle.
default:
- Scale = Width = 0;
+ Scale = TypeSize::Fixed(0);
+ Width = 0;
MinOffset = MaxOffset = 0;
return false;
case AArch64::STRWpost:
case AArch64::LDRWpost:
Width = 32;
- Scale = 4;
+ Scale = TypeSize::Fixed(4);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURQi:
case AArch64::STURQi:
Width = 16;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2072,7 +2149,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STURXi:
case AArch64::STURDi:
Width = 8;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2082,7 +2159,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STURWi:
case AArch64::STURSi:
Width = 4;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2093,7 +2170,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STURHi:
case AArch64::STURHHi:
Width = 2;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2104,7 +2181,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STURBi:
case AArch64::STURBBi:
Width = 1;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2112,14 +2189,15 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDNPQi:
case AArch64::STPQi:
case AArch64::STNPQi:
- Scale = 16;
+ Scale = TypeSize::Fixed(16);
Width = 32;
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
- Scale = Width = 16;
+ Scale = TypeSize::Fixed(16);
+ Width = 16;
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2131,7 +2209,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STPDi:
case AArch64::STNPXi:
case AArch64::STNPDi:
- Scale = 8;
+ Scale = TypeSize::Fixed(8);
Width = 16;
MinOffset = -64;
MaxOffset = 63;
@@ -2141,7 +2219,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
- Scale = Width = 8;
+ Scale = TypeSize::Fixed(8);
+ Width = 8;
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2153,7 +2232,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STPSi:
case AArch64::STNPWi:
case AArch64::STNPSi:
- Scale = 4;
+ Scale = TypeSize::Fixed(4);
Width = 8;
MinOffset = -64;
MaxOffset = 63;
@@ -2163,7 +2242,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDRSWui:
case AArch64::STRWui:
case AArch64::STRSui:
- Scale = Width = 4;
+ Scale = TypeSize::Fixed(4);
+ Width = 4;
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2173,7 +2253,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDRSHXui:
case AArch64::STRHui:
case AArch64::STRHHui:
- Scale = Width = 2;
+ Scale = TypeSize::Fixed(2);
+ Width = 2;
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2183,18 +2264,19 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDRSBXui:
case AArch64::STRBui:
case AArch64::STRBBui:
- Scale = Width = 1;
+ Scale = TypeSize::Fixed(1);
+ Width = 1;
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::ADDG:
- Scale = 16;
+ Scale = TypeSize::Fixed(16);
Width = 0;
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::TAGPstack:
- Scale = 16;
+ Scale = TypeSize::Fixed(16);
Width = 0;
// TAGP with a negative offset turns into SUBP, which has a maximum offset
// of 63 (not 64!).
@@ -2204,31 +2286,110 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDG:
case AArch64::STGOffset:
case AArch64::STZGOffset:
- Scale = Width = 16;
+ Scale = TypeSize::Fixed(16);
+ Width = 16;
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::STR_ZZZZXI:
+ case AArch64::LDR_ZZZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 4;
+ MinOffset = -256;
+ MaxOffset = 252;
+ break;
+ case AArch64::STR_ZZZXI:
+ case AArch64::LDR_ZZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 3;
+ MinOffset = -256;
+ MaxOffset = 253;
+ break;
+ case AArch64::STR_ZZXI:
+ case AArch64::LDR_ZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 2;
+ MinOffset = -256;
+ MaxOffset = 254;
+ break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
- Scale = Width = 2;
+ Scale = TypeSize::Scalable(2);
+ Width = SVEMaxBytesPerVector / 8;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
- Scale = Width = 16;
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector;
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::LD1B_IMM:
+ case AArch64::LD1H_IMM:
+ case AArch64::LD1W_IMM:
+ case AArch64::LD1D_IMM:
+ case AArch64::ST1B_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST1D_IMM:
+ // A full vectors worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_H_IMM:
+ case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1H_S_IMM:
+ case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1W_D_IMM:
+ case AArch64::LD1SW_D_IMM:
+ case AArch64::ST1B_H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1W_D_IMM:
+ // A half vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::Scalable(8);
+ Width = SVEMaxBytesPerVector / 2;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1H_D_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::ST1B_S_IMM:
+ case AArch64::ST1H_D_IMM:
+ // A quarter vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::Scalable(4);
+ Width = SVEMaxBytesPerVector / 4;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_D_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::ST1B_D_IMM:
+ // A eighth vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::Scalable(2);
+ Width = SVEMaxBytesPerVector / 8;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
- Scale = 16;
+ Scale = TypeSize::Fixed(16);
Width = 32;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STGPi:
- Scale = Width = 16;
+ Scale = TypeSize::Fixed(16);
+ Width = 16;
MinOffset = -64;
MaxOffset = 63;
break;
@@ -2363,9 +2524,13 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getMemOperandWithOffset returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
- const MachineOperand &BaseOp2,
- unsigned NumLoads) const {
+bool AArch64InstrInfo::shouldClusterMemOps(
+ ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+ unsigned NumBytes) const {
+ assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
+ const MachineOperand &BaseOp1 = *BaseOps1.front();
+ const MachineOperand &BaseOp2 = *BaseOps2.front();
const MachineInstr &FirstLdSt = *BaseOp1.getParent();
const MachineInstr &SecondLdSt = *BaseOp2.getParent();
if (BaseOp1.getType() != BaseOp2.getType())
@@ -2379,7 +2544,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
return false;
// Only cluster up to a single pair.
- if (NumLoads > 1)
+ if (NumLoads > 2)
return false;
if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
@@ -2822,11 +2987,11 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const MCInstrDesc &MCID,
- unsigned SrcReg, bool IsKill,
+ Register SrcReg, bool IsKill,
unsigned SubIdx0, unsigned SubIdx1, int FI,
MachineMemOperand *MMO) {
- unsigned SrcReg0 = SrcReg;
- unsigned SrcReg1 = SrcReg;
+ Register SrcReg0 = SrcReg;
+ Register SrcReg1 = SrcReg;
if (Register::isPhysicalRegister(SrcReg)) {
SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
SubIdx0 = 0;
@@ -2842,18 +3007,19 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
}
void AArch64InstrInfo::storeRegToStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
+ unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -2862,6 +3028,11 @@ void AArch64InstrInfo::storeRegToStackSlot(
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::STRHui;
+ else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_PXI;
+ StackID = TargetStackID::SVEVector;
+ }
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@@ -2901,6 +3072,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
get(AArch64::STPXi), SrcReg, isKill,
AArch64::sube64, AArch64::subo64, FI, MMO);
return;
+ } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 24:
@@ -2919,6 +3094,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 48:
@@ -2926,6 +3105,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d;
Offset = false;
+ } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 64:
@@ -2933,19 +3116,13 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
}
- unsigned StackID = TargetStackID::Default;
- if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
- Opc = AArch64::STR_PXI;
- StackID = TargetStackID::SVEVector;
- } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
- Opc = AArch64::STR_ZXI;
- StackID = TargetStackID::SVEVector;
- }
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
@@ -2962,11 +3139,11 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const MCInstrDesc &MCID,
- unsigned DestReg, unsigned SubIdx0,
+ Register DestReg, unsigned SubIdx0,
unsigned SubIdx1, int FI,
MachineMemOperand *MMO) {
- unsigned DestReg0 = DestReg;
- unsigned DestReg1 = DestReg;
+ Register DestReg0 = DestReg;
+ Register DestReg1 = DestReg;
bool IsUndef = true;
if (Register::isPhysicalRegister(DestReg)) {
DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
@@ -2984,18 +3161,19 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
}
void AArch64InstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
+ unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -3004,6 +3182,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRHui;
+ else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_PXI;
+ StackID = TargetStackID::SVEVector;
+ }
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@@ -3043,6 +3226,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
get(AArch64::LDPXi), DestReg, AArch64::sube64,
AArch64::subo64, FI, MMO);
return;
+ } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 24:
@@ -3061,6 +3248,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 48:
@@ -3068,6 +3259,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d;
Offset = false;
+ } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 64:
@@ -3075,20 +3270,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
}
- unsigned StackID = TargetStackID::Default;
- if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
- Opc = AArch64::LDR_PXI;
- StackID = TargetStackID::SVEVector;
- } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
- Opc = AArch64::LDR_ZXI;
- StackID = TargetStackID::SVEVector;
- }
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
@@ -3100,6 +3289,17 @@ void AArch64InstrInfo::loadRegFromStackSlot(
MI.addMemOperand(MMO);
}
+bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
+ const MachineInstr &UseMI,
+ const TargetRegisterInfo *TRI) {
+ return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
+ UseMI.getIterator()),
+ [TRI](const MachineInstr &I) {
+ return I.modifiesRegister(AArch64::NZCV, TRI) ||
+ I.readsRegister(AArch64::NZCV, TRI);
+ });
+}
+
// Helper function to emit a frame offset adjustment from a given
// pointer (SrcReg), stored into DestReg. This function is explicit
// in that it requires the opcode.
@@ -3146,6 +3346,10 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
// assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
+ Register TmpReg = DestReg;
+ if (TmpReg == AArch64::XZR)
+ TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
+ &AArch64::GPR64RegClass);
do {
uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
unsigned LocalShiftSize = 0;
@@ -3155,7 +3359,11 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
}
assert((ThisVal >> ShiftSize) <= MaxEncoding &&
"Encoding cannot handle value that big");
- auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
+
+ Offset -= ThisVal << LocalShiftSize;
+ if (Offset == 0)
+ TmpReg = DestReg;
+ auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
.addReg(SrcReg)
.addImm(Sign * (int)ThisVal);
if (ShiftSize)
@@ -3176,8 +3384,8 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
.addImm(Imm)
.setMIFlag(Flag);
- assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to "
- "emit a single SEH directive");
+ assert(Offset == 0 && "Expected remaining offset to be zero to "
+ "emit a single SEH directive");
} else if (DestReg == AArch64::SP) {
if (HasWinCFI)
*HasWinCFI = true;
@@ -3190,8 +3398,7 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
*HasWinCFI = true;
}
- SrcReg = DestReg;
- Offset -= ThisVal << LocalShiftSize;
+ SrcReg = TmpReg;
} while (Offset);
}
@@ -3414,18 +3621,6 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
return nullptr;
}
-static bool isSVEScaledImmInstruction(unsigned Opcode) {
- switch (Opcode) {
- case AArch64::LDR_ZXI:
- case AArch64::STR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::STR_PXI:
- return true;
- default:
- return false;
- }
-}
-
int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
StackOffset &SOffset,
bool *OutUseUnscaledOp,
@@ -3458,20 +3653,23 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
case AArch64::ST1Fourv1d:
case AArch64::IRG:
case AArch64::IRGstack:
+ case AArch64::STGloop:
+ case AArch64::STZGloop:
return AArch64FrameOffsetCannotUpdate;
}
// Get the min/max offset and the scale.
- unsigned Scale, Width;
+ TypeSize ScaleValue(0U, false);
+ unsigned Width;
int64_t MinOff, MaxOff;
- if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
+ if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
MaxOff))
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
// Construct the complete offset.
- bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode());
- int64_t Offset =
- IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes());
+ bool IsMulVL = ScaleValue.isScalable();
+ unsigned Scale = ScaleValue.getKnownMinSize();
+ int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes();
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
@@ -3484,9 +3682,14 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
if (useUnscaledOp &&
- !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff))
+ !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
+ MaxOff))
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
+ Scale = ScaleValue.getKnownMinSize();
+ assert(IsMulVL == ScaleValue.isScalable() &&
+ "Unscaled opcode has different value for scalable");
+
int64_t Remainder = Offset % Scale;
assert(!(Remainder && useUnscaledOp) &&
"Cannot have remainder when using unscaled op");
@@ -5791,6 +5994,35 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
});
+ // We check to see if CFI Instructions are present, and if they are
+ // we find the number of CFI Instructions in the candidates.
+ unsigned CFICount = 0;
+ MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
+ for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
+ Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
+ const std::vector<MCCFIInstruction> &CFIInstructions =
+ RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
+ if (MBBI->isCFIInstruction()) {
+ unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
+ MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ CFICount++;
+ }
+ MBBI++;
+ }
+
+ // We compare the number of found CFI Instructions to the number of CFI
+ // instructions in the parent function for each candidate. We must check this
+ // since if we outline one of the CFI instructions in a function, we have to
+ // outline them all for correctness. If we do not, the address offsets will be
+ // incorrect between the two sections of the program.
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ std::vector<MCCFIInstruction> CFIInstructions =
+ C.getMF()->getFrameInstructions();
+
+ if (CFICount > 0 && CFICount != CFIInstructions.size())
+ return outliner::OutlinedFunction();
+ }
+
// Returns true if an instructions is safe to fix up, false otherwise.
auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
if (MI.isCall())
@@ -5811,23 +6043,29 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
if (MI.mayLoadOrStore()) {
const MachineOperand *Base; // Filled with the base operand of MI.
int64_t Offset; // Filled with the offset of MI.
+ bool OffsetIsScalable;
// Does it allow us to offset the base operand and is the base the
// register SP?
- if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
- Base->getReg() != AArch64::SP)
+ if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
+ !Base->isReg() || Base->getReg() != AArch64::SP)
+ return false;
+
+ // Fixe-up code below assumes bytes.
+ if (OffsetIsScalable)
return false;
// Find the minimum/maximum offset for this instruction and check
// if fixing it up would be in range.
int64_t MinOffset,
MaxOffset; // Unscaled offsets for the instruction.
- unsigned Scale; // The scale to multiply the offsets by.
+ TypeSize Scale(0U, false); // The scale to multiply the offsets by.
unsigned DummyWidth;
getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Offset += 16; // Update the offset to what it would be if we outlined.
- if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
+ if (Offset < MinOffset * (int64_t)Scale.getFixedSize() ||
+ Offset > MaxOffset * (int64_t)Scale.getFixedSize())
return false;
// It's in range, so we can outline it.
@@ -5854,7 +6092,9 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
}
else if (LastInstrOpcode == AArch64::BL ||
- (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
+ ((LastInstrOpcode == AArch64::BLR ||
+ LastInstrOpcode == AArch64::BLRNoIP) &&
+ !HasBTI)) {
// FIXME: Do we need to check if the code after this uses the value of LR?
FrameID = MachineOutlinerThunk;
NumBytesToCreateFrame = 0;
@@ -5960,6 +6200,11 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
}
}
+ // If we have CFI instructions, we can only outline if the outlined section
+ // can be a tail call
+ if (FrameID != MachineOutlinerTailCall && CFICount > 0)
+ return outliner::OutlinedFunction();
+
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
NumBytesToCreateFrame, FrameID);
}
@@ -5986,6 +6231,10 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
if (!AFI || AFI->hasRedZone().getValueOr(true))
return false;
+ // FIXME: Teach the outliner to generate/handle Windows unwind info.
+ if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
+ return false;
+
// It's safe to outline from MF.
return true;
}
@@ -6081,6 +6330,15 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
if (FuncInfo->getLOHRelated().count(&MI))
return outliner::InstrType::Illegal;
+ // We can only outline these if we will tail call the outlined function, or
+ // fix up the CFI offsets. Currently, CFI instructions are outlined only if
+ // in a tail call.
+ //
+ // FIXME: If the proper fixups for the offset are implemented, this should be
+ // possible.
+ if (MI.isCFIInstruction())
+ return outliner::InstrType::Legal;
+
// Don't allow debug values to impact outlining type.
if (MI.isDebugInstr() || MI.isIndirectDebugValue())
return outliner::InstrType::Invisible;
@@ -6150,10 +6408,11 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// If we don't know anything about the callee, assume it depends on the
// stack layout of the caller. In that case, it's only legal to outline
- // as a tail-call. Whitelist the call instructions we know about so we
+ // as a tail-call. Explicitly list the call instructions we know about so we
// don't get unexpected results with call pseudo-instructions.
auto UnknownCallOutlineType = outliner::InstrType::Illegal;
- if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
+ if (MI.getOpcode() == AArch64::BLR ||
+ MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
if (!Callee)
@@ -6205,26 +6464,29 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
const MachineOperand *Base;
unsigned Width;
int64_t Offset;
+ bool OffsetIsScalable;
// Is this a load or store with an immediate offset with SP as the base?
if (!MI.mayLoadOrStore() ||
- !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
+ !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
+ &RI) ||
(Base->isReg() && Base->getReg() != AArch64::SP))
continue;
// It is, so we have to fix it up.
- unsigned Scale;
+ TypeSize Scale(0U, false);
int64_t Dummy1, Dummy2;
MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
assert(Scale != 0 && "Unexpected opcode!");
+ assert(!OffsetIsScalable && "Expected offset to be a byte offset");
// We've pushed the return address to the stack, so add 16 to the offset.
// This is safe, since we already checked if it would overflow when we
// checked if this instruction was legal to outline.
- int64_t NewImm = (Offset + 16) / Scale;
+ int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize();
StackOffsetOperand.setImm(NewImm);
}
}
@@ -6285,15 +6547,21 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
void AArch64InstrInfo::buildOutlinedFrame(
MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
- // For thunk outlining, rewrite the last instruction from a call to a
- // tail-call.
- if (OF.FrameConstructionID == MachineOutlinerThunk) {
+
+ AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
+
+ if (OF.FrameConstructionID == MachineOutlinerTailCall)
+ FI->setOutliningStyle("Tail Call");
+ else if (OF.FrameConstructionID == MachineOutlinerThunk) {
+ // For thunk outlining, rewrite the last instruction from a call to a
+ // tail-call.
MachineInstr *Call = &*--MBB.instr_end();
unsigned TailOpcode;
if (Call->getOpcode() == AArch64::BL) {
TailOpcode = AArch64::TCRETURNdi;
} else {
- assert(Call->getOpcode() == AArch64::BLR);
+ assert(Call->getOpcode() == AArch64::BLR ||
+ Call->getOpcode() == AArch64::BLRNoIP);
TailOpcode = AArch64::TCRETURNriALL;
}
MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
@@ -6301,6 +6569,8 @@ void AArch64InstrInfo::buildOutlinedFrame(
.addImm(0);
MBB.insert(MBB.end(), TC);
Call->eraseFromParent();
+
+ FI->setOutliningStyle("Thunk");
}
bool IsLeafFunction = true;
@@ -6320,7 +6590,8 @@ void AArch64InstrInfo::buildOutlinedFrame(
IsLeafFunction = false;
// LR has to be a live in so that we can save it.
- MBB.addLiveIn(AArch64::LR);
+ if (!MBB.isLiveIn(AArch64::LR))
+ MBB.addLiveIn(AArch64::LR);
MachineBasicBlock::iterator It = MBB.begin();
MachineBasicBlock::iterator Et = MBB.end();
@@ -6343,7 +6614,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
// Add a CFI saying the stack was moved 16 B down.
int64_t StackPosEntry =
- MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(StackPosEntry)
.setMIFlags(MachineInstr::FrameSetup);
@@ -6351,7 +6622,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
// Add a CFI saying that the LR that we want to find is now 16 B higher than
// before.
int64_t LRPosEntry =
- MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
+ MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(LRPosEntry)
.setMIFlags(MachineInstr::FrameSetup);
@@ -6399,13 +6670,20 @@ void AArch64InstrInfo::buildOutlinedFrame(
}
// It's not a tail call, so we have to insert the return ourselves.
+
+ // LR has to be a live in so that we can return to it.
+ if (!MBB.isLiveIn(AArch64::LR))
+ MBB.addLiveIn(AArch64::LR);
+
MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
- .addReg(AArch64::LR, RegState::Undef);
+ .addReg(AArch64::LR);
MBB.insert(MBB.end(), ret);
signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
ShouldSignReturnAddrWithAKey);
+ FI->setOutliningStyle("Function");
+
// Did we have to modify the stack by saving the link register?
if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
@@ -6519,7 +6797,8 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
// TODO: Handle cases where Reg is a super- or sub-register of the
// destination register.
- if (Reg != MI.getOperand(0).getReg())
+ const MachineOperand &Op0 = MI.getOperand(0);
+ if (!Op0.isReg() || Reg != Op0.getReg())
return None;
switch (MI.getOpcode()) {
@@ -6614,5 +6893,17 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
+uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::ElementSizeMask;
+}
+
+unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
+ if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
+ return AArch64::BLRNoIP;
+ else
+ return AArch64::BLR;
+}
+
#define GET_INSTRINFO_HELPERS
+#define GET_INSTRMAP_INFO
#include "AArch64GenInstrInfo.inc"