diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 557 |
1 files changed, 424 insertions, 133 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 54f3f7c101324..5139ae5ccaf19 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -24,9 +24,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -111,6 +111,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // This gets lowered to an instruction sequence which takes 16 bytes NumBytes = 16; break; + case AArch64::SpeculationBarrierISBDSBEndBB: + // This gets lowered to 2 4-byte instructions. + NumBytes = 8; + break; + case AArch64::SpeculationBarrierSBEndBB: + // This gets lowered to 1 4-byte instructions. + NumBytes = 4; + break; case AArch64::JumpTableDest32: case AArch64::JumpTableDest16: case AArch64::JumpTableDest8: @@ -119,11 +127,25 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { case AArch64::SPACE: NumBytes = MI.getOperand(1).getImm(); break; + case TargetOpcode::BUNDLE: + NumBytes = getInstBundleLength(MI); + break; } return NumBytes; } +unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI.getIterator(); + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += getInstSizeInBytes(*I); + } + return Size; +} + static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl<MachineOperand> &Cond) { // Block ends with fall-through condbranch. @@ -216,6 +238,12 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, if (I == MBB.end()) return false; + // Skip over SpeculationBarrierEndBB terminators + if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || + I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { + --I; + } + if (!isUnpredicatedTerminator(*I)) return false; @@ -496,8 +524,9 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, int &TrueCycles, + Register DstReg, Register TrueReg, + Register FalseReg, int &CondCycles, + int &TrueCycles, int &FalseCycles) const { // Check register classes. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -506,6 +535,12 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, if (!RC) return false; + // Also need to check the dest regclass, in case we're trying to optimize + // something like: + // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2 + if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg))) + return false; + // Expanding cbz/tbz requires an extra cycle of latency on the condition. unsigned ExtraCondLat = Cond.size() != 1; @@ -538,9 +573,9 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - const DebugLoc &DL, unsigned DstReg, + const DebugLoc &DL, Register DstReg, ArrayRef<MachineOperand> Cond, - unsigned TrueReg, unsigned FalseReg) const { + Register TrueReg, Register FalseReg) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Parse the condition code, see parseCondBranch() above. @@ -910,7 +945,7 @@ bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { } bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, + Register &SrcReg, Register &DstReg, unsigned &SubIdx) const { switch (MI.getOpcode()) { default: @@ -935,6 +970,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; unsigned WidthA = 0, WidthB = 0; + bool OffsetAIsScalable = false, OffsetBIsScalable = false; assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); @@ -948,9 +984,14 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( // base are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { - if (BaseOpA->isIdenticalTo(*BaseOpB)) { + // If OffsetAIsScalable and OffsetBIsScalable are both true, they + // are assumed to have the same scale (vscale). + if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable, + WidthA, TRI) && + getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable, + WidthB, TRI)) { + if (BaseOpA->isIdenticalTo(*BaseOpB) && + OffsetAIsScalable == OffsetBIsScalable) { int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; @@ -984,8 +1025,8 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. /// Return true if the comparison instruction can be analyzed. -bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, - unsigned &SrcReg2, int &CmpMask, +bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int &CmpMask, int &CmpValue) const { // The first operand can be a frame index where we'd normally expect a // register. @@ -1156,10 +1197,9 @@ static bool areCFlagsAccessedBetweenInstrs( return MI.getIterator() == From; }) != To->getParent()->rend()); - // We iterate backward starting \p To until we hit \p From. - for (--To; To != From; --To) { - const MachineInstr &Instr = *To; - + // We iterate backward starting at \p To until we hit \p From. + for (const MachineInstr &Instr : + instructionsWithoutDebug(++To.getReverse(), From.getReverse())) { if (((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) || ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) @@ -1180,7 +1220,7 @@ static bool areCFlagsAccessedBetweenInstrs( /// instruction. /// Only comparison with zero is supported. bool AArch64InstrInfo::optimizeCompareInstr( - MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, + MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const { assert(CmpInstr.getParent()); assert(MRI); @@ -1416,10 +1456,9 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, return false; UsedNZCV NZCVUsedAfterCmp; - for (auto I = std::next(CmpInstr->getIterator()), - E = CmpInstr->getParent()->instr_end(); - I != E; ++I) { - const MachineInstr &Instr = *I; + for (const MachineInstr &Instr : + instructionsWithoutDebug(std::next(CmpInstr->getIterator()), + CmpInstr->getParent()->instr_end())) { if (Instr.readsRegister(AArch64::NZCV, TRI)) { AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); if (CC == AArch64CC::Invalid) // Unsupported conditional instruction @@ -1684,6 +1723,8 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { FrameIndex = MI.getOperand(1).getIndex(); @@ -1796,9 +1837,37 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STNPSi: case AArch64::LDG: case AArch64::STGPi: + case AArch64::LD1B_IMM: + case AArch64::LD1H_IMM: + case AArch64::LD1W_IMM: + case AArch64::LD1D_IMM: + case AArch64::ST1B_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1D_IMM: + case AArch64::LD1B_H_IMM: + case AArch64::LD1SB_H_IMM: + case AArch64::LD1H_S_IMM: + case AArch64::LD1SH_S_IMM: + case AArch64::LD1W_D_IMM: + case AArch64::LD1SW_D_IMM: + case AArch64::ST1B_H_IMM: + case AArch64::ST1H_S_IMM: + case AArch64::ST1W_D_IMM: + case AArch64::LD1B_S_IMM: + case AArch64::LD1SB_S_IMM: + case AArch64::LD1H_D_IMM: + case AArch64::LD1SH_D_IMM: + case AArch64::ST1B_S_IMM: + case AArch64::ST1H_D_IMM: + case AArch64::LD1B_D_IMM: + case AArch64::LD1SB_D_IMM: + case AArch64::ST1B_D_IMM: return 3; case AArch64::ADDG: case AArch64::STGOffset: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: return 2; } } @@ -1978,20 +2047,25 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { return true; } -bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( + const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; - unsigned Width; - return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); + const MachineOperand *BaseOp; + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable, + Width, TRI)) + return false; + BaseOps.push_back(BaseOp); + return true; } bool AArch64InstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo *TRI) const { + bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const { assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() == 3) { @@ -2010,7 +2084,7 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth( // Get the scaling factor for the instruction and set the width for the // instruction. - unsigned Scale = 0; + TypeSize Scale(0U, false); int64_t Dummy1, Dummy2; // If this returns false, then it's an instruction we don't want to handle. @@ -2022,12 +2096,13 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth( // set to 1. if (LdSt.getNumExplicitOperands() == 3) { BaseOp = &LdSt.getOperand(1); - Offset = LdSt.getOperand(2).getImm() * Scale; + Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize(); } else { assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); BaseOp = &LdSt.getOperand(2); - Offset = LdSt.getOperand(3).getImm() * Scale; + Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize(); } + OffsetIsScalable = Scale.isScalable(); if (!BaseOp->isReg() && !BaseOp->isFI()) return false; @@ -2043,26 +2118,28 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { return OfsOp; } -bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, +bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) { + const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; switch (Opcode) { // Not a memory operation or something we want to handle. default: - Scale = Width = 0; + Scale = TypeSize::Fixed(0); + Width = 0; MinOffset = MaxOffset = 0; return false; case AArch64::STRWpost: case AArch64::LDRWpost: Width = 32; - Scale = 4; + Scale = TypeSize::Fixed(4); MinOffset = -256; MaxOffset = 255; break; case AArch64::LDURQi: case AArch64::STURQi: Width = 16; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2072,7 +2149,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::STURXi: case AArch64::STURDi: Width = 8; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2082,7 +2159,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::STURWi: case AArch64::STURSi: Width = 4; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2093,7 +2170,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::STURHi: case AArch64::STURHHi: Width = 2; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2104,7 +2181,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::STURBi: case AArch64::STURBBi: Width = 1; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2112,14 +2189,15 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::LDNPQi: case AArch64::STPQi: case AArch64::STNPQi: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 32; MinOffset = -64; MaxOffset = 63; break; case AArch64::LDRQui: case AArch64::STRQui: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = 0; MaxOffset = 4095; break; @@ -2131,7 +2209,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::STPDi: case AArch64::STNPXi: case AArch64::STNPDi: - Scale = 8; + Scale = TypeSize::Fixed(8); Width = 16; MinOffset = -64; MaxOffset = 63; @@ -2141,7 +2219,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::LDRDui: case AArch64::STRXui: case AArch64::STRDui: - Scale = Width = 8; + Scale = TypeSize::Fixed(8); + Width = 8; MinOffset = 0; MaxOffset = 4095; break; @@ -2153,7 +2232,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::STPSi: case AArch64::STNPWi: case AArch64::STNPSi: - Scale = 4; + Scale = TypeSize::Fixed(4); Width = 8; MinOffset = -64; MaxOffset = 63; @@ -2163,7 +2242,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::LDRSWui: case AArch64::STRWui: case AArch64::STRSui: - Scale = Width = 4; + Scale = TypeSize::Fixed(4); + Width = 4; MinOffset = 0; MaxOffset = 4095; break; @@ -2173,7 +2253,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::LDRSHXui: case AArch64::STRHui: case AArch64::STRHHui: - Scale = Width = 2; + Scale = TypeSize::Fixed(2); + Width = 2; MinOffset = 0; MaxOffset = 4095; break; @@ -2183,18 +2264,19 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::LDRSBXui: case AArch64::STRBui: case AArch64::STRBBui: - Scale = Width = 1; + Scale = TypeSize::Fixed(1); + Width = 1; MinOffset = 0; MaxOffset = 4095; break; case AArch64::ADDG: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 0; MinOffset = 0; MaxOffset = 63; break; case AArch64::TAGPstack: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 0; // TAGP with a negative offset turns into SUBP, which has a maximum offset // of 63 (not 64!). @@ -2204,31 +2286,110 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, case AArch64::LDG: case AArch64::STGOffset: case AArch64::STZGOffset: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = -256; MaxOffset = 255; break; + case AArch64::STR_ZZZZXI: + case AArch64::LDR_ZZZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 4; + MinOffset = -256; + MaxOffset = 252; + break; + case AArch64::STR_ZZZXI: + case AArch64::LDR_ZZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 3; + MinOffset = -256; + MaxOffset = 253; + break; + case AArch64::STR_ZZXI: + case AArch64::LDR_ZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 2; + MinOffset = -256; + MaxOffset = 254; + break; case AArch64::LDR_PXI: case AArch64::STR_PXI: - Scale = Width = 2; + Scale = TypeSize::Scalable(2); + Width = SVEMaxBytesPerVector / 8; MinOffset = -256; MaxOffset = 255; break; case AArch64::LDR_ZXI: case AArch64::STR_ZXI: - Scale = Width = 16; + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector; MinOffset = -256; MaxOffset = 255; break; + case AArch64::LD1B_IMM: + case AArch64::LD1H_IMM: + case AArch64::LD1W_IMM: + case AArch64::LD1D_IMM: + case AArch64::ST1B_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1D_IMM: + // A full vectors worth of data + // Width = mbytes * elements + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector; + MinOffset = -8; + MaxOffset = 7; + break; + case AArch64::LD1B_H_IMM: + case AArch64::LD1SB_H_IMM: + case AArch64::LD1H_S_IMM: + case AArch64::LD1SH_S_IMM: + case AArch64::LD1W_D_IMM: + case AArch64::LD1SW_D_IMM: + case AArch64::ST1B_H_IMM: + case AArch64::ST1H_S_IMM: + case AArch64::ST1W_D_IMM: + // A half vector worth of data + // Width = mbytes * elements + Scale = TypeSize::Scalable(8); + Width = SVEMaxBytesPerVector / 2; + MinOffset = -8; + MaxOffset = 7; + break; + case AArch64::LD1B_S_IMM: + case AArch64::LD1SB_S_IMM: + case AArch64::LD1H_D_IMM: + case AArch64::LD1SH_D_IMM: + case AArch64::ST1B_S_IMM: + case AArch64::ST1H_D_IMM: + // A quarter vector worth of data + // Width = mbytes * elements + Scale = TypeSize::Scalable(4); + Width = SVEMaxBytesPerVector / 4; + MinOffset = -8; + MaxOffset = 7; + break; + case AArch64::LD1B_D_IMM: + case AArch64::LD1SB_D_IMM: + case AArch64::ST1B_D_IMM: + // A eighth vector worth of data + // Width = mbytes * elements + Scale = TypeSize::Scalable(2); + Width = SVEMaxBytesPerVector / 8; + MinOffset = -8; + MaxOffset = 7; + break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 32; MinOffset = -256; MaxOffset = 255; break; case AArch64::STGPi: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = -64; MaxOffset = 63; break; @@ -2363,9 +2524,13 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, /// Detect opportunities for ldp/stp formation. /// /// Only called for LdSt for which getMemOperandWithOffset returns true. -bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, - unsigned NumLoads) const { +bool AArch64InstrInfo::shouldClusterMemOps( + ArrayRef<const MachineOperand *> BaseOps1, + ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads, + unsigned NumBytes) const { + assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); + const MachineOperand &BaseOp1 = *BaseOps1.front(); + const MachineOperand &BaseOp2 = *BaseOps2.front(); const MachineInstr &FirstLdSt = *BaseOp1.getParent(); const MachineInstr &SecondLdSt = *BaseOp2.getParent(); if (BaseOp1.getType() != BaseOp2.getType()) @@ -2379,7 +2544,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, return false; // Only cluster up to a single pair. - if (NumLoads > 1) + if (NumLoads > 2) return false; if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) @@ -2822,11 +2987,11 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, - unsigned SrcReg, bool IsKill, + Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO) { - unsigned SrcReg0 = SrcReg; - unsigned SrcReg1 = SrcReg; + Register SrcReg0 = SrcReg; + Register SrcReg1 = SrcReg; if (Register::isPhysicalRegister(SrcReg)) { SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); SubIdx0 = 0; @@ -2842,18 +3007,19 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, } void AArch64InstrInfo::storeRegToStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); unsigned Opc = 0; bool Offset = true; + unsigned StackID = TargetStackID::Default; switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) @@ -2862,6 +3028,11 @@ void AArch64InstrInfo::storeRegToStackSlot( case 2: if (AArch64::FPR16RegClass.hasSubClassEq(RC)) Opc = AArch64::STRHui; + else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_PXI; + StackID = TargetStackID::SVEVector; + } break; case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { @@ -2901,6 +3072,10 @@ void AArch64InstrInfo::storeRegToStackSlot( get(AArch64::STPXi), SrcReg, isKill, AArch64::sube64, AArch64::subo64, FI, MMO); return; + } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZXI; + StackID = TargetStackID::SVEVector; } break; case 24: @@ -2919,6 +3094,10 @@ void AArch64InstrInfo::storeRegToStackSlot( assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d; Offset = false; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZXI; + StackID = TargetStackID::SVEVector; } break; case 48: @@ -2926,6 +3105,10 @@ void AArch64InstrInfo::storeRegToStackSlot( assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev2d; Offset = false; + } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZZXI; + StackID = TargetStackID::SVEVector; } break; case 64: @@ -2933,19 +3116,13 @@ void AArch64InstrInfo::storeRegToStackSlot( assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d; Offset = false; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZZZXI; + StackID = TargetStackID::SVEVector; } break; } - unsigned StackID = TargetStackID::Default; - if (AArch64::PPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); - Opc = AArch64::STR_PXI; - StackID = TargetStackID::SVEVector; - } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); - Opc = AArch64::STR_ZXI; - StackID = TargetStackID::SVEVector; - } assert(Opc && "Unknown register class"); MFI.setStackID(FI, StackID); @@ -2962,11 +3139,11 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, - unsigned DestReg, unsigned SubIdx0, + Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO) { - unsigned DestReg0 = DestReg; - unsigned DestReg1 = DestReg; + Register DestReg0 = DestReg; + Register DestReg1 = DestReg; bool IsUndef = true; if (Register::isPhysicalRegister(DestReg)) { DestReg0 = TRI.getSubReg(DestReg, SubIdx0); @@ -2984,18 +3161,19 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, } void AArch64InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); unsigned Opc = 0; bool Offset = true; + unsigned StackID = TargetStackID::Default; switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) @@ -3004,6 +3182,11 @@ void AArch64InstrInfo::loadRegFromStackSlot( case 2: if (AArch64::FPR16RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRHui; + else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_PXI; + StackID = TargetStackID::SVEVector; + } break; case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { @@ -3043,6 +3226,10 @@ void AArch64InstrInfo::loadRegFromStackSlot( get(AArch64::LDPXi), DestReg, AArch64::sube64, AArch64::subo64, FI, MMO); return; + } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZXI; + StackID = TargetStackID::SVEVector; } break; case 24: @@ -3061,6 +3248,10 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d; Offset = false; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZXI; + StackID = TargetStackID::SVEVector; } break; case 48: @@ -3068,6 +3259,10 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev2d; Offset = false; + } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZZXI; + StackID = TargetStackID::SVEVector; } break; case 64: @@ -3075,20 +3270,14 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d; Offset = false; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZZZXI; + StackID = TargetStackID::SVEVector; } break; } - unsigned StackID = TargetStackID::Default; - if (AArch64::PPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); - Opc = AArch64::LDR_PXI; - StackID = TargetStackID::SVEVector; - } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); - Opc = AArch64::LDR_ZXI; - StackID = TargetStackID::SVEVector; - } assert(Opc && "Unknown register class"); MFI.setStackID(FI, StackID); @@ -3100,6 +3289,17 @@ void AArch64InstrInfo::loadRegFromStackSlot( MI.addMemOperand(MMO); } +bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, + const MachineInstr &UseMI, + const TargetRegisterInfo *TRI) { + return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()), + UseMI.getIterator()), + [TRI](const MachineInstr &I) { + return I.modifiesRegister(AArch64::NZCV, TRI) || + I.readsRegister(AArch64::NZCV, TRI); + }); +} + // Helper function to emit a frame offset adjustment from a given // pointer (SrcReg), stored into DestReg. This function is explicit // in that it requires the opcode. @@ -3146,6 +3346,10 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB, // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; + Register TmpReg = DestReg; + if (TmpReg == AArch64::XZR) + TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister( + &AArch64::GPR64RegClass); do { uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue); unsigned LocalShiftSize = 0; @@ -3155,7 +3359,11 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB, } assert((ThisVal >> ShiftSize) <= MaxEncoding && "Encoding cannot handle value that big"); - auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + + Offset -= ThisVal << LocalShiftSize; + if (Offset == 0) + TmpReg = DestReg; + auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg) .addReg(SrcReg) .addImm(Sign * (int)ThisVal); if (ShiftSize) @@ -3176,8 +3384,8 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) .addImm(Imm) .setMIFlag(Flag); - assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to " - "emit a single SEH directive"); + assert(Offset == 0 && "Expected remaining offset to be zero to " + "emit a single SEH directive"); } else if (DestReg == AArch64::SP) { if (HasWinCFI) *HasWinCFI = true; @@ -3190,8 +3398,7 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB, *HasWinCFI = true; } - SrcReg = DestReg; - Offset -= ThisVal << LocalShiftSize; + SrcReg = TmpReg; } while (Offset); } @@ -3414,18 +3621,6 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( return nullptr; } -static bool isSVEScaledImmInstruction(unsigned Opcode) { - switch (Opcode) { - case AArch64::LDR_ZXI: - case AArch64::STR_ZXI: - case AArch64::LDR_PXI: - case AArch64::STR_PXI: - return true; - default: - return false; - } -} - int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &SOffset, bool *OutUseUnscaledOp, @@ -3458,20 +3653,23 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, case AArch64::ST1Fourv1d: case AArch64::IRG: case AArch64::IRGstack: + case AArch64::STGloop: + case AArch64::STZGloop: return AArch64FrameOffsetCannotUpdate; } // Get the min/max offset and the scale. - unsigned Scale, Width; + TypeSize ScaleValue(0U, false); + unsigned Width; int64_t MinOff, MaxOff; - if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff, + if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff, MaxOff)) llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); // Construct the complete offset. - bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode()); - int64_t Offset = - IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes()); + bool IsMulVL = ScaleValue.isScalable(); + unsigned Scale = ScaleValue.getKnownMinSize(); + int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes(); const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); @@ -3484,9 +3682,14 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); if (useUnscaledOp && - !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff)) + !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff, + MaxOff)) llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); + Scale = ScaleValue.getKnownMinSize(); + assert(IsMulVL == ScaleValue.isScalable() && + "Unscaled opcode has different value for scalable"); + int64_t Remainder = Offset % Scale; assert(!(Remainder && useUnscaledOp) && "Cannot have remainder when using unscaled op"); @@ -5791,6 +5994,35 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement"); }); + // We check to see if CFI Instructions are present, and if they are + // we find the number of CFI Instructions in the candidates. + unsigned CFICount = 0; + MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front(); + for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx(); + Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) { + const std::vector<MCCFIInstruction> &CFIInstructions = + RepeatedSequenceLocs[0].getMF()->getFrameInstructions(); + if (MBBI->isCFIInstruction()) { + unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex(); + MCCFIInstruction CFI = CFIInstructions[CFIIndex]; + CFICount++; + } + MBBI++; + } + + // We compare the number of found CFI Instructions to the number of CFI + // instructions in the parent function for each candidate. We must check this + // since if we outline one of the CFI instructions in a function, we have to + // outline them all for correctness. If we do not, the address offsets will be + // incorrect between the two sections of the program. + for (outliner::Candidate &C : RepeatedSequenceLocs) { + std::vector<MCCFIInstruction> CFIInstructions = + C.getMF()->getFrameInstructions(); + + if (CFICount > 0 && CFICount != CFIInstructions.size()) + return outliner::OutlinedFunction(); + } + // Returns true if an instructions is safe to fix up, false otherwise. auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) { if (MI.isCall()) @@ -5811,23 +6043,29 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( if (MI.mayLoadOrStore()) { const MachineOperand *Base; // Filled with the base operand of MI. int64_t Offset; // Filled with the offset of MI. + bool OffsetIsScalable; // Does it allow us to offset the base operand and is the base the // register SP? - if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() || - Base->getReg() != AArch64::SP) + if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) || + !Base->isReg() || Base->getReg() != AArch64::SP) + return false; + + // Fixe-up code below assumes bytes. + if (OffsetIsScalable) return false; // Find the minimum/maximum offset for this instruction and check // if fixing it up would be in range. int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction. - unsigned Scale; // The scale to multiply the offsets by. + TypeSize Scale(0U, false); // The scale to multiply the offsets by. unsigned DummyWidth; getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); Offset += 16; // Update the offset to what it would be if we outlined. - if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale) + if (Offset < MinOffset * (int64_t)Scale.getFixedSize() || + Offset > MaxOffset * (int64_t)Scale.getFixedSize()) return false; // It's in range, so we can outline it. @@ -5854,7 +6092,9 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( } else if (LastInstrOpcode == AArch64::BL || - (LastInstrOpcode == AArch64::BLR && !HasBTI)) { + ((LastInstrOpcode == AArch64::BLR || + LastInstrOpcode == AArch64::BLRNoIP) && + !HasBTI)) { // FIXME: Do we need to check if the code after this uses the value of LR? FrameID = MachineOutlinerThunk; NumBytesToCreateFrame = 0; @@ -5960,6 +6200,11 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( } } + // If we have CFI instructions, we can only outline if the outlined section + // can be a tail call + if (FrameID != MachineOutlinerTailCall && CFICount > 0) + return outliner::OutlinedFunction(); + return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID); } @@ -5986,6 +6231,10 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( if (!AFI || AFI->hasRedZone().getValueOr(true)) return false; + // FIXME: Teach the outliner to generate/handle Windows unwind info. + if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) + return false; + // It's safe to outline from MF. return true; } @@ -6081,6 +6330,15 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, if (FuncInfo->getLOHRelated().count(&MI)) return outliner::InstrType::Illegal; + // We can only outline these if we will tail call the outlined function, or + // fix up the CFI offsets. Currently, CFI instructions are outlined only if + // in a tail call. + // + // FIXME: If the proper fixups for the offset are implemented, this should be + // possible. + if (MI.isCFIInstruction()) + return outliner::InstrType::Legal; + // Don't allow debug values to impact outlining type. if (MI.isDebugInstr() || MI.isIndirectDebugValue()) return outliner::InstrType::Invisible; @@ -6150,10 +6408,11 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, // If we don't know anything about the callee, assume it depends on the // stack layout of the caller. In that case, it's only legal to outline - // as a tail-call. Whitelist the call instructions we know about so we + // as a tail-call. Explicitly list the call instructions we know about so we // don't get unexpected results with call pseudo-instructions. auto UnknownCallOutlineType = outliner::InstrType::Illegal; - if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL) + if (MI.getOpcode() == AArch64::BLR || + MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL) UnknownCallOutlineType = outliner::InstrType::LegalTerminator; if (!Callee) @@ -6205,26 +6464,29 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { const MachineOperand *Base; unsigned Width; int64_t Offset; + bool OffsetIsScalable; // Is this a load or store with an immediate offset with SP as the base? if (!MI.mayLoadOrStore() || - !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) || + !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width, + &RI) || (Base->isReg() && Base->getReg() != AArch64::SP)) continue; // It is, so we have to fix it up. - unsigned Scale; + TypeSize Scale(0U, false); int64_t Dummy1, Dummy2; MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); assert(Scale != 0 && "Unexpected opcode!"); + assert(!OffsetIsScalable && "Expected offset to be a byte offset"); // We've pushed the return address to the stack, so add 16 to the offset. // This is safe, since we already checked if it would overflow when we // checked if this instruction was legal to outline. - int64_t NewImm = (Offset + 16) / Scale; + int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize(); StackOffsetOperand.setImm(NewImm); } } @@ -6285,15 +6547,21 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, void AArch64InstrInfo::buildOutlinedFrame( MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const { - // For thunk outlining, rewrite the last instruction from a call to a - // tail-call. - if (OF.FrameConstructionID == MachineOutlinerThunk) { + + AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>(); + + if (OF.FrameConstructionID == MachineOutlinerTailCall) + FI->setOutliningStyle("Tail Call"); + else if (OF.FrameConstructionID == MachineOutlinerThunk) { + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. MachineInstr *Call = &*--MBB.instr_end(); unsigned TailOpcode; if (Call->getOpcode() == AArch64::BL) { TailOpcode = AArch64::TCRETURNdi; } else { - assert(Call->getOpcode() == AArch64::BLR); + assert(Call->getOpcode() == AArch64::BLR || + Call->getOpcode() == AArch64::BLRNoIP); TailOpcode = AArch64::TCRETURNriALL; } MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) @@ -6301,6 +6569,8 @@ void AArch64InstrInfo::buildOutlinedFrame( .addImm(0); MBB.insert(MBB.end(), TC); Call->eraseFromParent(); + + FI->setOutliningStyle("Thunk"); } bool IsLeafFunction = true; @@ -6320,7 +6590,8 @@ void AArch64InstrInfo::buildOutlinedFrame( IsLeafFunction = false; // LR has to be a live in so that we can save it. - MBB.addLiveIn(AArch64::LR); + if (!MBB.isLiveIn(AArch64::LR)) + MBB.addLiveIn(AArch64::LR); MachineBasicBlock::iterator It = MBB.begin(); MachineBasicBlock::iterator Et = MBB.end(); @@ -6343,7 +6614,7 @@ void AArch64InstrInfo::buildOutlinedFrame( // Add a CFI saying the stack was moved 16 B down. int64_t StackPosEntry = - MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16)); + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16)); BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) .addCFIIndex(StackPosEntry) .setMIFlags(MachineInstr::FrameSetup); @@ -6351,7 +6622,7 @@ void AArch64InstrInfo::buildOutlinedFrame( // Add a CFI saying that the LR that we want to find is now 16 B higher than // before. int64_t LRPosEntry = - MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16)); + MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16)); BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) .addCFIIndex(LRPosEntry) .setMIFlags(MachineInstr::FrameSetup); @@ -6399,13 +6670,20 @@ void AArch64InstrInfo::buildOutlinedFrame( } // It's not a tail call, so we have to insert the return ourselves. + + // LR has to be a live in so that we can return to it. + if (!MBB.isLiveIn(AArch64::LR)) + MBB.addLiveIn(AArch64::LR); + MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) - .addReg(AArch64::LR, RegState::Undef); + .addReg(AArch64::LR); MBB.insert(MBB.end(), ret); signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, ShouldSignReturnAddrWithAKey); + FI->setOutliningStyle("Function"); + // Did we have to modify the stack by saving the link register? if (OF.FrameConstructionID != MachineOutlinerDefault) return; @@ -6519,7 +6797,8 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, // TODO: Handle cases where Reg is a super- or sub-register of the // destination register. - if (Reg != MI.getOperand(0).getReg()) + const MachineOperand &Op0 = MI.getOperand(0); + if (!Op0.isReg() || Reg != Op0.getReg()) return None; switch (MI.getOpcode()) { @@ -6614,5 +6893,17 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, return TargetInstrInfo::describeLoadedValue(MI, Reg); } +uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { + return get(Opc).TSFlags & AArch64::ElementSizeMask; +} + +unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { + if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr()) + return AArch64::BLRNoIP; + else + return AArch64::BLR; +} + #define GET_INSTRINFO_HELPERS +#define GET_INSTRMAP_INFO #include "AArch64GenInstrInfo.inc" |