summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp190
1 files changed, 154 insertions, 36 deletions
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 626c934f236e..5c8acba26aab 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -14,16 +14,37 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include <algorithm>
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -529,19 +550,19 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
default:
llvm_unreachable("Unknown branch opcode in Cond");
case AArch64::CBZW:
- Is64Bit = 0;
+ Is64Bit = false;
CC = AArch64CC::EQ;
break;
case AArch64::CBZX:
- Is64Bit = 1;
+ Is64Bit = true;
CC = AArch64CC::EQ;
break;
case AArch64::CBNZW:
- Is64Bit = 0;
+ Is64Bit = false;
CC = AArch64CC::NE;
break;
case AArch64::CBNZX:
- Is64Bit = 1;
+ Is64Bit = true;
CC = AArch64CC::NE;
break;
}
@@ -1044,7 +1065,7 @@ static unsigned sForm(MachineInstr &Instr) {
case AArch64::SUBSWri:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
- return Instr.getOpcode();;
+ return Instr.getOpcode();
case AArch64::ADDWrr: return AArch64::ADDSWrr;
case AArch64::ADDWri: return AArch64::ADDSWri;
@@ -1072,12 +1093,15 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
}
namespace {
+
struct UsedNZCV {
- bool N;
- bool Z;
- bool C;
- bool V;
- UsedNZCV(): N(false), Z(false), C(false), V(false) {}
+ bool N = false;
+ bool Z = false;
+ bool C = false;
+ bool V = false;
+
+ UsedNZCV() = default;
+
UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
this->N |= UsedFlags.N;
this->Z |= UsedFlags.Z;
@@ -1086,6 +1110,7 @@ struct UsedNZCV {
return *this;
}
};
+
} // end anonymous namespace
/// Find a condition code used by the instruction.
@@ -1561,7 +1586,7 @@ bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
/// Check all MachineMemOperands for a hint to suppress pairing.
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
- return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
return MMO->getFlags() & MOSuppressPair;
});
}
@@ -1994,7 +2019,7 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
void AArch64InstrInfo::copyPhysRegTuple(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
- llvm::ArrayRef<unsigned> Indices) const {
+ ArrayRef<unsigned> Indices) const {
assert(Subtarget.hasNEON() &&
"Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2583,7 +2608,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// <rdar://problem/11522048>
//
- if (MI.isCopy()) {
+ if (MI.isFullCopy()) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
if (SrcReg == AArch64::SP &&
@@ -2598,7 +2623,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
}
}
- // Handle the case where a copy is being spilled or refilled but the source
+ // Handle the case where a copy is being spilled or filled but the source
// and destination register class don't match. For example:
//
// %vreg0<def> = COPY %XZR; GPR64common:%vreg0
@@ -2613,7 +2638,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
//
- // will be refilled as
+ // will be filled as
//
// LDRDui %vreg0, fi<#0>
//
@@ -2622,9 +2647,11 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// LDRXui %vregTemp, fi<#0>
// %vreg0 = FMOV %vregTemp
//
- if (MI.isFullCopy() && Ops.size() == 1 &&
+ if (MI.isCopy() && Ops.size() == 1 &&
// Make sure we're only folding the explicit COPY defs/uses.
(Ops[0] == 0 || Ops[0] == 1)) {
+ bool IsSpill = Ops[0] == 0;
+ bool IsFill = !IsSpill;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
@@ -2632,21 +2659,112 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
const MachineOperand &SrcMO = MI.getOperand(1);
unsigned DstReg = DstMO.getReg();
unsigned SrcReg = SrcMO.getReg();
+ // This is slightly expensive to compute for physical regs since
+ // getMinimalPhysRegClass is slow.
auto getRegClass = [&](unsigned Reg) {
return TargetRegisterInfo::isVirtualRegister(Reg)
? MRI.getRegClass(Reg)
: TRI.getMinimalPhysRegClass(Reg);
};
- const TargetRegisterClass &DstRC = *getRegClass(DstReg);
- const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
- if (DstRC.getSize() == SrcRC.getSize()) {
- if (Ops[0] == 0)
+
+ if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
+ assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
+ "Mismatched register size in non subreg COPY");
+ if (IsSpill)
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
- &SrcRC, &TRI);
+ getRegClass(SrcReg), &TRI);
else
- loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
+ getRegClass(DstReg), &TRI);
return &*--InsertPt;
}
+
+ // Handle cases like spilling def of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
+ //
+ // where the physical register source can be widened and stored to the full
+ // virtual reg destination stack slot, in this case producing:
+ //
+ // STRXui %XZR, <fi#0>
+ //
+ if (IsSpill && DstMO.isUndef() &&
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ assert(SrcMO.getSubReg() == 0 &&
+ "Unexpected subreg on physical register");
+ const TargetRegisterClass *SpillRC;
+ unsigned SpillSubreg;
+ switch (DstMO.getSubReg()) {
+ default:
+ SpillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ case AArch64::ssub:
+ if (AArch64::GPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::GPR64RegClass;
+ SpillSubreg = AArch64::sub_32;
+ } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR64RegClass;
+ SpillSubreg = AArch64::ssub;
+ } else
+ SpillRC = nullptr;
+ break;
+ case AArch64::dsub:
+ if (AArch64::FPR64RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR128RegClass;
+ SpillSubreg = AArch64::dsub;
+ } else
+ SpillRC = nullptr;
+ break;
+ }
+
+ if (SpillRC)
+ if (unsigned WidenedSrcReg =
+ TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
+ storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
+ FrameIndex, SpillRC, &TRI);
+ return &*--InsertPt;
+ }
+ }
+
+ // Handle cases like filling use of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
+ //
+ // where we can load the full virtual reg source stack slot, into the subreg
+ // destination, in this case producing:
+ //
+ // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
+ //
+ if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
+ const TargetRegisterClass *FillRC;
+ switch (DstMO.getSubReg()) {
+ default:
+ FillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ FillRC = &AArch64::GPR32RegClass;
+ break;
+ case AArch64::ssub:
+ FillRC = &AArch64::FPR32RegClass;
+ break;
+ case AArch64::dsub:
+ FillRC = &AArch64::FPR64RegClass;
+ break;
+ }
+
+ if (FillRC) {
+ assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
+ "Mismatched regclass size on folded subreg COPY");
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
+ MachineInstr &LoadMI = *--InsertPt;
+ MachineOperand &LoadDst = LoadMI.getOperand(0);
+ assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
+ LoadDst.setSubReg(DstMO.getSubReg());
+ LoadDst.setIsUndef();
+ return &LoadMI;
+ }
+ }
}
// Cannot fold.
@@ -2936,7 +3054,7 @@ bool AArch64InstrInfo::useMachineCombiner() const {
return true;
}
-//
+
// True when Opc sets flag
static bool isCombineInstrSettingFlag(unsigned Opc) {
switch (Opc) {
@@ -2955,7 +3073,7 @@ static bool isCombineInstrSettingFlag(unsigned Opc) {
}
return false;
}
-//
+
// 32b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate32(unsigned Opc) {
switch (Opc) {
@@ -2974,7 +3092,7 @@ static bool isCombineInstrCandidate32(unsigned Opc) {
}
return false;
}
-//
+
// 64b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate64(unsigned Opc) {
switch (Opc) {
@@ -2993,7 +3111,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
}
return false;
}
-//
+
// FP Opcodes that can be combined with a FMUL
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
@@ -3009,13 +3127,13 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
case AArch64::FSUBv2f32:
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
- TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
- return (Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast);
+ TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
+ return (Options.UnsafeFPMath ||
+ Options.AllowFPOpFusion == FPOpFusion::Fast);
}
return false;
}
-//
+
// Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate(unsigned Opc) {
return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
@@ -3205,7 +3323,7 @@ static bool getFMAPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
if (!isCombineInstrCandidateFP(Root))
- return 0;
+ return false;
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -3971,8 +4089,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
-
- return;
}
/// \brief Replace csincr-branch sequence by simple conditional branch
@@ -4148,6 +4264,7 @@ AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace AArch64II;
+
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_PAGE, "aarch64-page"},
{MO_PAGEOFF, "aarch64-pageoff"},
@@ -4162,6 +4279,7 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
using namespace AArch64II;
+
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_GOT, "aarch64-got"},
{MO_NC, "aarch64-nc"},