summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp1743
1 files changed, 1344 insertions, 399 deletions
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index f398117de953b..0aa4708f35ac4 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -22,27 +22,31 @@
#include "llvm/MC/MCInst.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
+static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair =
+ MachineMemOperand::MOTargetFlag1;
+
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
-unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const MachineBasicBlock &MBB = *MI->getParent();
+unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+ const MachineBasicBlock &MBB = *MI.getParent();
const MachineFunction *MF = MBB.getParent();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
- if (MI->getOpcode() == AArch64::INLINEASM)
- return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ if (MI.getOpcode() == AArch64::INLINEASM)
+ return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
- const MCInstrDesc &Desc = MI->getDesc();
+ const MCInstrDesc &Desc = MI.getDesc();
switch (Desc.getOpcode()) {
default:
// Anything not explicitly designated otherwise is a nomal 4-byte insn.
@@ -89,25 +93,25 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
}
// Branch analysis.
-bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
+bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end())
return false;
- if (!isUnpredicatedTerminator(I))
+ if (!isUnpredicatedTerminator(*I))
return false;
// Get the last instruction in the block.
- MachineInstr *LastInst = I;
+ MachineInstr *LastInst = &*I;
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
if (isUncondBranchOpcode(LastOpc)) {
TBB = LastInst->getOperand(0).getMBB();
return false;
@@ -121,7 +125,7 @@ bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// Get the instruction before it if it is a terminator.
- MachineInstr *SecondLastInst = I;
+ MachineInstr *SecondLastInst = &*I;
unsigned SecondLastOpc = SecondLastInst->getOpcode();
// If AllowModify is true and the block ends with two or more unconditional
@@ -131,19 +135,19 @@ bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
LastInst->eraseFromParent();
LastInst = SecondLastInst;
LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
// Return now the only terminator is an unconditional branch.
TBB = LastInst->getOperand(0).getMBB();
return false;
} else {
- SecondLastInst = I;
+ SecondLastInst = &*I;
SecondLastOpc = SecondLastInst->getOpcode();
}
}
}
// If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
return true;
// If the block ends with a B and a Bcc, handle it.
@@ -243,7 +247,7 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
}
void AArch64InstrInfo::instantiateCondBranch(
- MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
+ MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
ArrayRef<MachineOperand> Cond) const {
if (Cond[0].getImm() != -1) {
// Regular Bcc
@@ -259,9 +263,11 @@ void AArch64InstrInfo::instantiateCondBranch(
}
}
-unsigned AArch64InstrInfo::InsertBranch(
- MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
+unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -399,8 +405,8 @@ bool AArch64InstrInfo::canInsertSelect(
}
void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DstReg,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -533,8 +539,8 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
}
/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
-static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
- uint64_t Imm = MI->getOperand(1).getImm();
+static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
+ uint64_t Imm = MI.getOperand(1).getImm();
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
uint64_t Encoding;
return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
@@ -542,11 +548,13 @@ static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
// FIXME: this implementation should be micro-architecture dependent, so a
// micro-architecture target hook should be introduced here in future.
-bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
- if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53())
- return MI->isAsCheapAsAMove();
+bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
+ if (!Subtarget.hasCustomCheapAsMoveHandling())
+ return MI.isAsCheapAsAMove();
+
+ unsigned Imm;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default:
return false;
@@ -555,7 +563,17 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
case AArch64::ADDXri:
case AArch64::SUBWri:
case AArch64::SUBXri:
- return (MI->getOperand(3).getImm() == 0);
+ return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
+ MI.getOperand(3).getImm() == 0);
+
+ // add/sub on register with shift
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ Imm = MI.getOperand(3).getImm();
+ return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
+ AArch64_AM::getArithShiftValue(Imm) < 4);
// logical ops on immediate
case AArch64::ANDWri:
@@ -580,12 +598,41 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
case AArch64::ORRWrr:
case AArch64::ORRXrr:
return true;
+
+ // logical ops on register with shift
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ case AArch64::EONWrs:
+ case AArch64::EONXrs:
+ case AArch64::EORWrs:
+ case AArch64::EORXrs:
+ case AArch64::ORNWrs:
+ case AArch64::ORNXrs:
+ case AArch64::ORRWrs:
+ case AArch64::ORRXrs:
+ Imm = MI.getOperand(3).getImm();
+ return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
+ AArch64_AM::getShiftValue(Imm) < 4 &&
+ AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
+
// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
// ORRXri, it is as cheap as MOV
case AArch64::MOVi32imm:
return canBeExpandedToORR(MI, 32);
case AArch64::MOVi64imm:
return canBeExpandedToORR(MI, 64);
+
+ // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
+ // feature.
+ case AArch64::FMOVS0:
+ case AArch64::FMOVD0:
+ return Subtarget.hasZeroCycleZeroing();
+ case TargetOpcode::COPY:
+ return (Subtarget.hasZeroCycleZeroing() &&
+ (MI.getOperand(1).getReg() == AArch64::WZR ||
+ MI.getOperand(1).getReg() == AArch64::XZR));
}
llvm_unreachable("Unknown opcode to check as cheap as a move!");
@@ -611,20 +658,18 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
}
}
-bool
-AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
- MachineInstr *MIb,
- AliasAnalysis *AA) const {
+bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
+ MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
const TargetRegisterInfo *TRI = &getRegisterInfo();
unsigned BaseRegA = 0, BaseRegB = 0;
- int OffsetA = 0, OffsetB = 0;
- int WidthA = 0, WidthB = 0;
+ int64_t OffsetA = 0, OffsetB = 0;
+ unsigned WidthA = 0, WidthB = 0;
- assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store.");
- assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store.");
+ assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
+ assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
- if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
- MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
+ if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
+ MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
// Retrieve the base register, offset from the base register and width. Width
@@ -648,10 +693,10 @@ AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
-bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &CmpMask,
int &CmpValue) const {
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default:
break;
case AArch64::SUBSWrr:
@@ -667,8 +712,8 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
case AArch64::ADDSXrs:
case AArch64::ADDSXrx:
// Replace SUBSWrr with SUBWrr if NZCV is not used.
- SrcReg = MI->getOperand(1).getReg();
- SrcReg2 = MI->getOperand(2).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ SrcReg2 = MI.getOperand(2).getReg();
CmpMask = ~0;
CmpValue = 0;
return true;
@@ -676,17 +721,17 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
case AArch64::ADDSWri:
case AArch64::SUBSXri:
case AArch64::ADDSXri:
- SrcReg = MI->getOperand(1).getReg();
+ SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
// FIXME: In order to convert CmpValue to 0 or 1
- CmpValue = (MI->getOperand(2).getImm() != 0);
+ CmpValue = MI.getOperand(2).getImm() != 0;
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
// ANDS does not use the same encoding scheme as the others xxxS
// instructions.
- SrcReg = MI->getOperand(1).getReg();
+ SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
// FIXME:The return val type of decodeLogicalImmediate is uint64_t,
@@ -694,17 +739,17 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
// the high 32 bits of uint64_t will be lost.
// In fact it causes a bug in spec2006-483.xalancbmk
// CmpValue is only used to compare with zero in OptimizeCompareInstr
- CmpValue = (AArch64_AM::decodeLogicalImmediate(
- MI->getOperand(2).getImm(),
- MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0);
+ CmpValue = AArch64_AM::decodeLogicalImmediate(
+ MI.getOperand(2).getImm(),
+ MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
return true;
}
return false;
}
-static bool UpdateOperandRegClass(MachineInstr *Instr) {
- MachineBasicBlock *MBB = Instr->getParent();
+static bool UpdateOperandRegClass(MachineInstr &Instr) {
+ MachineBasicBlock *MBB = Instr.getParent();
assert(MBB && "Can't get MachineBasicBlock here");
MachineFunction *MF = MBB->getParent();
assert(MF && "Can't get MachineFunction here");
@@ -712,11 +757,11 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
+ for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
++OpIdx) {
- MachineOperand &MO = Instr->getOperand(OpIdx);
+ MachineOperand &MO = Instr.getOperand(OpIdx);
const TargetRegisterClass *OpRegCstraints =
- Instr->getRegClassConstraint(OpIdx, TII, TRI);
+ Instr.getRegClassConstraint(OpIdx, TII, TRI);
// If there's no constraint, there's nothing to do.
if (!OpRegCstraints)
@@ -744,16 +789,16 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
/// \brief Return the opcode that does not set flags when possible - otherwise
/// return the original opcode. The caller is responsible to do the actual
/// substitution and legality checking.
-static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
+static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
// Don't convert all compare instructions, because for some the zero register
// encoding becomes the sp register.
bool MIDefinesZeroReg = false;
- if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR))
+ if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
MIDefinesZeroReg = true;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default:
- return MI->getOpcode();
+ return MI.getOpcode();
case AArch64::ADDSWrr:
return AArch64::ADDWrr;
case AArch64::ADDSWri:
@@ -789,60 +834,76 @@ static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
}
}
-/// True when condition code could be modified on the instruction
-/// trace starting at from and ending at to.
-static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To,
- const bool CheckOnlyCCWrites,
- const TargetRegisterInfo *TRI) {
- // We iterate backward starting \p To until we hit \p From
- MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin();
+enum AccessKind {
+ AK_Write = 0x01,
+ AK_Read = 0x10,
+ AK_All = 0x11
+};
+/// True when condition flags are accessed (either by writing or reading)
+/// on the instruction trace starting at From and ending at To.
+///
+/// Note: If From and To are from different blocks it's assumed CC are accessed
+/// on the path.
+static bool areCFlagsAccessedBetweenInstrs(
+ MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
+ const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
// Early exit if To is at the beginning of the BB.
- if (I == B)
+ if (To == To->getParent()->begin())
return true;
- // Check whether the definition of SrcReg is in the same basic block as
- // Compare. If not, assume the condition code gets modified on some path.
+ // Check whether the instructions are in the same basic block
+ // If not, assume the condition flags might get modified somewhere.
if (To->getParent() != From->getParent())
return true;
- // Check that NZCV isn't set on the trace.
- for (--I; I != E; --I) {
- const MachineInstr &Instr = *I;
+ // From must be above To.
+ assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
+ To->getParent()->rend(), [From](MachineInstr &MI) {
+ return MachineBasicBlock::iterator(MI) == From;
+ }) != To->getParent()->rend());
- if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
- (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI)))
- // This instruction modifies or uses NZCV after the one we want to
- // change.
- return true;
- if (I == B)
- // We currently don't allow the instruction trace to cross basic
- // block boundaries
+ // We iterate backward starting \p To until we hit \p From.
+ for (--To; To != From; --To) {
+ const MachineInstr &Instr = *To;
+
+ if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
+ ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
return true;
}
return false;
}
-/// optimizeCompareInstr - Convert the instruction supplying the argument to the
-/// comparison into one that sets the zero bit in the flags register.
+
+/// Try to optimize a compare instruction. A compare instruction is an
+/// instruction which produces AArch64::NZCV. It can be truly compare instruction
+/// when there are no uses of its destination register.
+///
+/// The following steps are tried in order:
+/// 1. Convert CmpInstr into an unconditional version.
+/// 2. Remove CmpInstr if above there is an instruction producing a needed
+/// condition code or an instruction which can be converted into such an instruction.
+/// Only comparison with zero is supported.
bool AArch64InstrInfo::optimizeCompareInstr(
- MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
+ MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
int CmpValue, const MachineRegisterInfo *MRI) const {
+ assert(CmpInstr.getParent());
+ assert(MRI);
// Replace SUBSWrr with SUBWrr if NZCV is not used.
- int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
- if (Cmp_NZCV != -1) {
- if (CmpInstr->definesRegister(AArch64::WZR) ||
- CmpInstr->definesRegister(AArch64::XZR)) {
- CmpInstr->eraseFromParent();
+ int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
+ if (DeadNZCVIdx != -1) {
+ if (CmpInstr.definesRegister(AArch64::WZR) ||
+ CmpInstr.definesRegister(AArch64::XZR)) {
+ CmpInstr.eraseFromParent();
return true;
}
- unsigned Opc = CmpInstr->getOpcode();
+ unsigned Opc = CmpInstr.getOpcode();
unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
if (NewOpc == Opc)
return false;
const MCInstrDesc &MCID = get(NewOpc);
- CmpInstr->setDesc(MCID);
- CmpInstr->RemoveOperand(Cmp_NZCV);
+ CmpInstr.setDesc(MCID);
+ CmpInstr.RemoveOperand(DeadNZCVIdx);
bool succeeded = UpdateOperandRegClass(CmpInstr);
(void)succeeded;
assert(succeeded && "Some operands reg class are incompatible!");
@@ -857,23 +918,21 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return false;
// CmpInstr is a Compare instruction if destination register is not used.
- if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
- return false;
-
- // Get the unique definition of SrcReg.
- MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
- if (!MI)
+ if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false;
- bool CheckOnlyCCWrites = false;
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI))
- return false;
+ return substituteCmpToZero(CmpInstr, SrcReg, MRI);
+}
- unsigned NewOpc = MI->getOpcode();
- switch (MI->getOpcode()) {
+/// Get opcode of S version of Instr.
+/// If Instr is S version its opcode is returned.
+/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
+/// or we are not interested in it.
+static unsigned sForm(MachineInstr &Instr) {
+ switch (Instr.getOpcode()) {
default:
- return false;
+ return AArch64::INSTRUCTION_LIST_END;
+
case AArch64::ADDSWrr:
case AArch64::ADDSWri:
case AArch64::ADDSXrr:
@@ -882,116 +941,221 @@ bool AArch64InstrInfo::optimizeCompareInstr(
case AArch64::SUBSWri:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
- break;
- case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break;
- case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break;
- case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break;
- case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break;
- case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break;
- case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break;
- case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break;
- case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break;
- case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break;
- case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break;
- case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break;
- case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break;
- case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break;
- case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break;
- }
-
- // Scan forward for the use of NZCV.
- // When checking against MI: if it's a conditional code requires
- // checking of V bit, then this is not safe to do.
- // It is safe to remove CmpInstr if NZCV is redefined or killed.
- // If we are done with the basic block, we need to check whether NZCV is
- // live-out.
- bool IsSafe = false;
- for (MachineBasicBlock::iterator I = CmpInstr,
- E = CmpInstr->getParent()->end();
- !IsSafe && ++I != E;) {
- const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
- ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) {
- IsSafe = true;
- break;
- }
- if (!MO.isReg() || MO.getReg() != AArch64::NZCV)
- continue;
- if (MO.isDef()) {
- IsSafe = true;
- break;
- }
+ return Instr.getOpcode();;
+
+ case AArch64::ADDWrr: return AArch64::ADDSWrr;
+ case AArch64::ADDWri: return AArch64::ADDSWri;
+ case AArch64::ADDXrr: return AArch64::ADDSXrr;
+ case AArch64::ADDXri: return AArch64::ADDSXri;
+ case AArch64::ADCWr: return AArch64::ADCSWr;
+ case AArch64::ADCXr: return AArch64::ADCSXr;
+ case AArch64::SUBWrr: return AArch64::SUBSWrr;
+ case AArch64::SUBWri: return AArch64::SUBSWri;
+ case AArch64::SUBXrr: return AArch64::SUBSXrr;
+ case AArch64::SUBXri: return AArch64::SUBSXri;
+ case AArch64::SBCWr: return AArch64::SBCSWr;
+ case AArch64::SBCXr: return AArch64::SBCSXr;
+ case AArch64::ANDWri: return AArch64::ANDSWri;
+ case AArch64::ANDXri: return AArch64::ANDSXri;
+ }
+}
- // Decode the condition code.
- unsigned Opc = Instr.getOpcode();
- AArch64CC::CondCode CC;
- switch (Opc) {
- default:
- return false;
- case AArch64::Bcc:
- CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm();
- break;
- case AArch64::CSINVWr:
- case AArch64::CSINVXr:
- case AArch64::CSINCWr:
- case AArch64::CSINCXr:
- case AArch64::CSELWr:
- case AArch64::CSELXr:
- case AArch64::CSNEGWr:
- case AArch64::CSNEGXr:
- case AArch64::FCSELSrrr:
- case AArch64::FCSELDrrr:
- CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm();
- break;
- }
+/// Check if AArch64::NZCV should be alive in successors of MBB.
+static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
+ for (auto *BB : MBB->successors())
+ if (BB->isLiveIn(AArch64::NZCV))
+ return true;
+ return false;
+}
- // It is not safe to remove Compare instruction if Overflow(V) is used.
- switch (CC) {
- default:
- // NZCV can be used multiple times, we should continue.
- break;
- case AArch64CC::VS:
- case AArch64CC::VC:
- case AArch64CC::GE:
- case AArch64CC::LT:
- case AArch64CC::GT:
- case AArch64CC::LE:
- return false;
- }
+struct UsedNZCV {
+ bool N;
+ bool Z;
+ bool C;
+ bool V;
+ UsedNZCV(): N(false), Z(false), C(false), V(false) {}
+ UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
+ this->N |= UsedFlags.N;
+ this->Z |= UsedFlags.Z;
+ this->C |= UsedFlags.C;
+ this->V |= UsedFlags.V;
+ return *this;
+ }
+};
+
+/// Find a condition code used by the instruction.
+/// Returns AArch64CC::Invalid if either the instruction does not use condition
+/// codes or we don't optimize CmpInstr in the presence of such instructions.
+static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
+ switch (Instr.getOpcode()) {
+ default:
+ return AArch64CC::Invalid;
+
+ case AArch64::Bcc: {
+ int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
+ assert(Idx >= 2);
+ return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
}
+
+ case AArch64::CSINVWr:
+ case AArch64::CSINVXr:
+ case AArch64::CSINCWr:
+ case AArch64::CSINCXr:
+ case AArch64::CSELWr:
+ case AArch64::CSELXr:
+ case AArch64::CSNEGWr:
+ case AArch64::CSNEGXr:
+ case AArch64::FCSELSrrr:
+ case AArch64::FCSELDrrr: {
+ int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
+ assert(Idx >= 1);
+ return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
+ }
+ }
+}
+
+static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
+ assert(CC != AArch64CC::Invalid);
+ UsedNZCV UsedFlags;
+ switch (CC) {
+ default:
+ break;
+
+ case AArch64CC::EQ: // Z set
+ case AArch64CC::NE: // Z clear
+ UsedFlags.Z = true;
+ break;
+
+ case AArch64CC::HI: // Z clear and C set
+ case AArch64CC::LS: // Z set or C clear
+ UsedFlags.Z = true;
+ case AArch64CC::HS: // C set
+ case AArch64CC::LO: // C clear
+ UsedFlags.C = true;
+ break;
+
+ case AArch64CC::MI: // N set
+ case AArch64CC::PL: // N clear
+ UsedFlags.N = true;
+ break;
+
+ case AArch64CC::VS: // V set
+ case AArch64CC::VC: // V clear
+ UsedFlags.V = true;
+ break;
+
+ case AArch64CC::GT: // Z clear, N and V the same
+ case AArch64CC::LE: // Z set, N and V differ
+ UsedFlags.Z = true;
+ case AArch64CC::GE: // N and V the same
+ case AArch64CC::LT: // N and V differ
+ UsedFlags.N = true;
+ UsedFlags.V = true;
+ break;
}
+ return UsedFlags;
+}
+
+static bool isADDSRegImm(unsigned Opcode) {
+ return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
+}
+
+static bool isSUBSRegImm(unsigned Opcode) {
+ return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
+}
+
+/// Check if CmpInstr can be substituted by MI.
+///
+/// CmpInstr can be substituted:
+/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
+/// - and, MI and CmpInstr are from the same MachineBB
+/// - and, condition flags are not alive in successors of the CmpInstr parent
+/// - and, if MI opcode is the S form there must be no defs of flags between
+/// MI and CmpInstr
+/// or if MI opcode is not the S form there must be neither defs of flags
+/// nor uses of flags between MI and CmpInstr.
+/// - and C/V flags are not used after CmpInstr
+static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
+ const TargetRegisterInfo *TRI) {
+ assert(MI);
+ assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
+ assert(CmpInstr);
+
+ const unsigned CmpOpcode = CmpInstr->getOpcode();
+ if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
+ return false;
- // If NZCV is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if (!IsSafe) {
- MachineBasicBlock *ParentBlock = CmpInstr->getParent();
- for (auto *MBB : ParentBlock->successors())
- if (MBB->isLiveIn(AArch64::NZCV))
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
+ return false;
+
+ AccessKind AccessToCheck = AK_Write;
+ if (sForm(*MI) != MI->getOpcode())
+ AccessToCheck = AK_All;
+ if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
+ return false;
+
+ UsedNZCV NZCVUsedAfterCmp;
+ for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
+ I != E; ++I) {
+ const MachineInstr &Instr = *I;
+ if (Instr.readsRegister(AArch64::NZCV, TRI)) {
+ AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
+ if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
return false;
+ NZCVUsedAfterCmp |= getUsedNZCV(CC);
+ }
+
+ if (Instr.modifiesRegister(AArch64::NZCV, TRI))
+ break;
}
+
+ return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
+}
+
+/// Substitute an instruction comparing to zero with another instruction
+/// which produces needed condition flags.
+///
+/// Return true on success.
+bool AArch64InstrInfo::substituteCmpToZero(
+ MachineInstr &CmpInstr, unsigned SrcReg,
+ const MachineRegisterInfo *MRI) const {
+ assert(MRI);
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI)
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ unsigned NewOpc = sForm(*MI);
+ if (NewOpc == AArch64::INSTRUCTION_LIST_END)
+ return false;
+
+ if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
+ return false;
// Update the instruction to set NZCV.
MI->setDesc(get(NewOpc));
- CmpInstr->eraseFromParent();
- bool succeeded = UpdateOperandRegClass(MI);
+ CmpInstr.eraseFromParent();
+ bool succeeded = UpdateOperandRegClass(*MI);
(void)succeeded;
assert(succeeded && "Some operands reg class are incompatible!");
MI->addRegisterDefined(AArch64::NZCV, TRI);
return true;
}
-bool
-AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
+bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
return false;
- MachineBasicBlock &MBB = *MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
- unsigned Reg = MI->getOperand(0).getReg();
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Reg = MI.getOperand(0).getReg();
const GlobalValue *GV =
- cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+ cast<GlobalValue>((*MI.memoperands_begin())->getValue());
const TargetMachine &TM = MBB.getParent()->getTarget();
unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
const unsigned char MO_NC = AArch64II::MO_NC;
@@ -1000,8 +1164,9 @@ AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
.addGlobalAddress(GV, 0, AArch64II::MO_GOT);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
- .addReg(Reg, RegState::Kill).addImm(0)
- .addMemOperand(*MI->memoperands_begin());
+ .addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(*MI.memoperands_begin());
} else if (TM.getCodeModel() == CodeModel::Large) {
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
.addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
@@ -1015,8 +1180,9 @@ AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
.addReg(Reg, RegState::Kill)
.addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
- .addReg(Reg, RegState::Kill).addImm(0)
- .addMemOperand(*MI->memoperands_begin());
+ .addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(*MI.memoperands_begin());
} else {
BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
@@ -1024,7 +1190,7 @@ AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addGlobalAddress(GV, 0, LoFlags)
- .addMemOperand(*MI->memoperands_begin());
+ .addMemOperand(*MI.memoperands_begin());
}
MBB.erase(MI);
@@ -1033,8 +1199,8 @@ AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
}
/// Return true if this is this instruction has a non-zero immediate
-bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
default:
break;
case AArch64::ADDSWrs:
@@ -1069,8 +1235,8 @@ bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
case AArch64::SUBSXrs:
case AArch64::SUBWrs:
case AArch64::SUBXrs:
- if (MI->getOperand(3).isImm()) {
- unsigned val = MI->getOperand(3).getImm();
+ if (MI.getOperand(3).isImm()) {
+ unsigned val = MI.getOperand(3).getImm();
return (val != 0);
}
break;
@@ -1079,8 +1245,8 @@ bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
}
/// Return true if this is this instruction has a non-zero immediate
-bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
default:
break;
case AArch64::ADDSWrx:
@@ -1095,8 +1261,8 @@ bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
case AArch64::SUBWrx:
case AArch64::SUBXrx:
case AArch64::SUBXrx64:
- if (MI->getOperand(3).isImm()) {
- unsigned val = MI->getOperand(3).getImm();
+ if (MI.getOperand(3).isImm()) {
+ unsigned val = MI.getOperand(3).getImm();
return (val != 0);
}
break;
@@ -1107,51 +1273,51 @@ bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
// Return true if this instruction simply sets its single destination register
// to zero. This is equivalent to a register rename of the zero-register.
-bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
default:
break;
case AArch64::MOVZWi:
case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
- if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
- assert(MI->getDesc().getNumOperands() == 3 &&
- MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
+ if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
+ assert(MI.getDesc().getNumOperands() == 3 &&
+ MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
return true;
}
break;
case AArch64::ANDWri: // and Rd, Rzr, #imm
- return MI->getOperand(1).getReg() == AArch64::WZR;
+ return MI.getOperand(1).getReg() == AArch64::WZR;
case AArch64::ANDXri:
- return MI->getOperand(1).getReg() == AArch64::XZR;
+ return MI.getOperand(1).getReg() == AArch64::XZR;
case TargetOpcode::COPY:
- return MI->getOperand(1).getReg() == AArch64::WZR;
+ return MI.getOperand(1).getReg() == AArch64::WZR;
}
return false;
}
// Return true if this instruction simply renames a general register without
// modifying bits.
-bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
default:
break;
case TargetOpcode::COPY: {
// GPR32 copies will by lowered to ORRXrs
- unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
return (AArch64::GPR32RegClass.contains(DstReg) ||
AArch64::GPR64RegClass.contains(DstReg));
}
case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
- if (MI->getOperand(1).getReg() == AArch64::XZR) {
- assert(MI->getDesc().getNumOperands() == 4 &&
- MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
+ if (MI.getOperand(1).getReg() == AArch64::XZR) {
+ assert(MI.getDesc().getNumOperands() == 4 &&
+ MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
return true;
}
break;
case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
- if (MI->getOperand(2).getImm() == 0) {
- assert(MI->getDesc().getNumOperands() == 4 &&
- MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
+ if (MI.getOperand(2).getImm() == 0) {
+ assert(MI.getDesc().getNumOperands() == 4 &&
+ MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
return true;
}
break;
@@ -1161,19 +1327,19 @@ bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
// Return true if this instruction simply renames a general register without
// modifying bits.
-bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
default:
break;
case TargetOpcode::COPY: {
// FPR64 copies will by lowered to ORR.16b
- unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
return (AArch64::FPR64RegClass.contains(DstReg) ||
AArch64::FPR128RegClass.contains(DstReg));
}
case AArch64::ORRv16i8:
- if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
- assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
"invalid ORRv16i8 operands");
return true;
}
@@ -1182,9 +1348,9 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
return false;
}
-unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default:
break;
case AArch64::LDRWui:
@@ -1194,10 +1360,10 @@ unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
- if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
+ MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
}
@@ -1205,9 +1371,9 @@ unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return 0;
}
-unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default:
break;
case AArch64::STRWui:
@@ -1217,10 +1383,10 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
- if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
+ MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
}
@@ -1230,8 +1396,8 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
/// Return true if this is load/store scales or extends its register offset.
/// This refers to scaling a dynamic index as opposed to scaled immediates.
/// MI should be a memory op that allows scaled addressing.
-bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
default:
break;
case AArch64::LDRBBroW:
@@ -1281,7 +1447,7 @@ bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
case AArch64::STRWroX:
case AArch64::STRXroX:
- unsigned Val = MI->getOperand(3).getImm();
+ unsigned Val = MI.getOperand(3).getImm();
AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
}
@@ -1289,36 +1455,96 @@ bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
}
/// Check all MachineMemOperands for a hint to suppress pairing.
-bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
- assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
- "Too many target MO flags");
- for (auto *MM : MI->memoperands()) {
- if (MM->getFlags() &
- (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
- return true;
- }
- }
- return false;
+bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
+ return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return MMO->getFlags() & MOSuppressPair;
+ });
}
/// Set a flag on the first MachineMemOperand to suppress pairing.
-void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
- if (MI->memoperands_empty())
+void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
+ if (MI.memoperands_empty())
return;
+ (*MI.memoperands_begin())->setFlags(MOSuppressPair);
+}
- assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
- "Too many target MO flags");
- (*MI->memoperands_begin())
- ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
+bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::STURSi:
+ case AArch64::STURDi:
+ case AArch64::STURQi:
+ case AArch64::STURBBi:
+ case AArch64::STURHHi:
+ case AArch64::STURWi:
+ case AArch64::STURXi:
+ case AArch64::LDURSi:
+ case AArch64::LDURDi:
+ case AArch64::LDURQi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi:
+ case AArch64::LDURSWi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
+ return true;
+ }
}
-bool
-AArch64InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
- unsigned &Offset,
- const TargetRegisterInfo *TRI) const {
- switch (LdSt->getOpcode()) {
+bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
+ return isUnscaledLdSt(MI.getOpcode());
+}
+
+// Is this a candidate for ld/st merging or pairing? For example, we don't
+// touch volatiles or load/stores that have a hint to avoid pair formation.
+bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
+ // If this is a volatile load/store, don't mess with it.
+ if (MI.hasOrderedMemoryRef())
+ return false;
+
+ // Make sure this is a reg+imm (as opposed to an address reloc).
+ assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
+ if (!MI.getOperand(2).isImm())
+ return false;
+
+ // Can't merge/pair if the instruction modifies the base register.
+ // e.g., ldr x0, [x0]
+ unsigned BaseReg = MI.getOperand(1).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (MI.modifiesRegister(BaseReg, TRI))
+ return false;
+
+ // Check if this load/store has a hint to avoid pair formation.
+ // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
+ if (isLdStPairSuppressed(MI))
+ return false;
+
+ // On some CPUs quad load/store pairs are slower than two single load/stores.
+ if (Subtarget.avoidQuadLdStPairs()) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
+ MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
+ switch (LdSt.getOpcode()) {
default:
return false;
+ // Scaled instructions.
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
@@ -1329,29 +1555,45 @@ AArch64InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
case AArch64::LDRQui:
case AArch64::LDRXui:
case AArch64::LDRWui:
- if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
- return false;
- BaseReg = LdSt->getOperand(1).getReg();
- MachineFunction &MF = *LdSt->getParent()->getParent();
- unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize();
- Offset = LdSt->getOperand(2).getImm() * Width;
- return true;
+ case AArch64::LDRSWui:
+ // Unscaled instructions.
+ case AArch64::STURSi:
+ case AArch64::STURDi:
+ case AArch64::STURQi:
+ case AArch64::STURXi:
+ case AArch64::STURWi:
+ case AArch64::LDURSi:
+ case AArch64::LDURDi:
+ case AArch64::LDURQi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi:
+ case AArch64::LDURSWi:
+ unsigned Width;
+ return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
};
}
bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
- MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
+ MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const {
+ assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
- if (LdSt->getNumOperands() != 3)
- return false;
- if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
+ if (LdSt.getNumExplicitOperands() == 3) {
+ // Non-paired instruction (e.g., ldr x1, [x0, #8]).
+ if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
+ return false;
+ } else if (LdSt.getNumExplicitOperands() == 4) {
+ // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
+ if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
+ !LdSt.getOperand(3).isImm())
+ return false;
+ } else
return false;
// Offset is calculated as the immediate operand multiplied by the scaling factor.
// Unscaled instructions have scaling factor set to 1.
- int Scale = 0;
- switch (LdSt->getOpcode()) {
+ unsigned Scale = 0;
+ switch (LdSt.getOpcode()) {
default:
return false;
case AArch64::LDURQi:
@@ -1392,18 +1634,48 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Width = 1;
Scale = 1;
break;
+ case AArch64::LDPQi:
+ case AArch64::LDNPQi:
+ case AArch64::STPQi:
+ case AArch64::STNPQi:
+ Scale = 16;
+ Width = 32;
+ break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = Width = 16;
break;
+ case AArch64::LDPXi:
+ case AArch64::LDPDi:
+ case AArch64::LDNPXi:
+ case AArch64::LDNPDi:
+ case AArch64::STPXi:
+ case AArch64::STPDi:
+ case AArch64::STNPXi:
+ case AArch64::STNPDi:
+ Scale = 8;
+ Width = 16;
+ break;
case AArch64::LDRXui:
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
Scale = Width = 8;
break;
+ case AArch64::LDPWi:
+ case AArch64::LDPSi:
+ case AArch64::LDNPWi:
+ case AArch64::LDNPSi:
+ case AArch64::STPWi:
+ case AArch64::STPSi:
+ case AArch64::STNPWi:
+ case AArch64::STNPSi:
+ Scale = 4;
+ Width = 8;
+ break;
case AArch64::LDRWui:
case AArch64::LDRSui:
+ case AArch64::LDRSWui:
case AArch64::STRWui:
case AArch64::STRSui:
Scale = Width = 4;
@@ -1420,41 +1692,120 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STRBBui:
Scale = Width = 1;
break;
- };
+ }
- BaseReg = LdSt->getOperand(1).getReg();
- Offset = LdSt->getOperand(2).getImm() * Scale;
+ if (LdSt.getNumExplicitOperands() == 3) {
+ BaseReg = LdSt.getOperand(1).getReg();
+ Offset = LdSt.getOperand(2).getImm() * Scale;
+ } else {
+ assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
+ BaseReg = LdSt.getOperand(2).getReg();
+ Offset = LdSt.getOperand(3).getImm() * Scale;
+ }
return true;
}
+// Scale the unscaled offsets. Returns false if the unscaled offset can't be
+// scaled.
+static bool scaleOffset(unsigned Opc, int64_t &Offset) {
+ unsigned OffsetStride = 1;
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ OffsetStride = 16;
+ break;
+ case AArch64::LDURXi:
+ case AArch64::LDURDi:
+ case AArch64::STURXi:
+ case AArch64::STURDi:
+ OffsetStride = 8;
+ break;
+ case AArch64::LDURWi:
+ case AArch64::LDURSi:
+ case AArch64::LDURSWi:
+ case AArch64::STURWi:
+ case AArch64::STURSi:
+ OffsetStride = 4;
+ break;
+ }
+ // If the byte-offset isn't a multiple of the stride, we can't scale this
+ // offset.
+ if (Offset % OffsetStride != 0)
+ return false;
+
+ // Convert the byte-offset used by unscaled into an "element" offset used
+ // by the scaled pair load/store instructions.
+ Offset /= OffsetStride;
+ return true;
+}
+
+static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
+ if (FirstOpc == SecondOpc)
+ return true;
+ // We can also pair sign-ext and zero-ext instructions.
+ switch (FirstOpc) {
+ default:
+ return false;
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
+ }
+ // These instructions can't be paired based on their opcodes.
+ return false;
+}
+
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
-bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
- MachineInstr *SecondLdSt,
- unsigned NumLoads) const {
+bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
+ MachineInstr &SecondLdSt,
+ unsigned NumLoads) const {
// Only cluster up to a single pair.
if (NumLoads > 1)
return false;
- if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
+
+ // Can we pair these instructions based on their opcodes?
+ unsigned FirstOpc = FirstLdSt.getOpcode();
+ unsigned SecondOpc = SecondLdSt.getOpcode();
+ if (!canPairLdStOpc(FirstOpc, SecondOpc))
+ return false;
+
+ // Can't merge volatiles or load/stores that have a hint to avoid pair
+ // formation, for example.
+ if (!isCandidateToMergeOrPair(FirstLdSt) ||
+ !isCandidateToMergeOrPair(SecondLdSt))
+ return false;
+
+ // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
+ int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
+ if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
+ return false;
+
+ int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
+ if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
return false;
- // getMemOpBaseRegImmOfs guarantees that oper 2 isImm.
- unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
- // Allow 6 bits of positive range.
- if (Ofs1 > 64)
+
+ // Pairwise instructions have a 7-bit signed offset field.
+ if (Offset1 > 63 || Offset1 < -64)
return false;
+
// The caller should already have ordered First/SecondLdSt by offset.
- unsigned Ofs2 = SecondLdSt->getOperand(2).getImm();
- return Ofs1 + 1 == Ofs2;
+ assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
+ return Offset1 + 1 == Offset2;
}
-bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
- MachineInstr *Second) const {
- if (Subtarget.isCyclone()) {
- // Cyclone can fuse CMN, CMP, TST followed by Bcc.
- unsigned SecondOpcode = Second->getOpcode();
+bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
+ MachineInstr &Second) const {
+ if (Subtarget.hasMacroOpFusion()) {
+ // Fuse CMN, CMP, TST followed by Bcc.
+ unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::Bcc) {
- switch (First->getOpcode()) {
+ switch (First.getOpcode()) {
default:
return false;
case AArch64::SUBSWri:
@@ -1466,10 +1817,10 @@ bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
return true;
}
}
- // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
+ // Fuse ALU operations followed by CBZ/CBNZ.
if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
- switch (First->getOpcode()) {
+ switch (First.getOpcode()) {
default:
return false;
case AArch64::ADDWri:
@@ -1491,7 +1842,7 @@ bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
- const MDNode *Expr, DebugLoc DL) const {
+ const MDNode *Expr, const DebugLoc &DL) const {
MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
.addFrameIndex(FrameIx)
.addImm(0)
@@ -1521,7 +1872,7 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
}
void AArch64InstrInfo::copyPhysRegTuple(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
llvm::ArrayRef<unsigned> Indices) const {
assert(Subtarget.hasNEON() &&
@@ -1547,9 +1898,9 @@ void AArch64InstrInfo::copyPhysRegTuple(
}
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
if (AArch64::GPR32spRegClass.contains(DestReg) &&
(AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -1818,8 +2169,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (SrcReg == AArch64::NZCV) {
assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
- BuildMI(MBB, I, DL, get(AArch64::MRS))
- .addReg(DestReg)
+ BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
.addImm(AArch64SysReg::NZCV)
.addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
return;
@@ -1879,39 +2229,45 @@ void AArch64InstrInfo::storeRegToStackSlot(
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
- Opc = AArch64::ST1Twov1d, Offset = false;
+ Opc = AArch64::ST1Twov1d;
+ Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
- Opc = AArch64::ST1Threev1d, Offset = false;
+ Opc = AArch64::ST1Threev1d;
+ Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
- Opc = AArch64::ST1Fourv1d, Offset = false;
+ Opc = AArch64::ST1Fourv1d;
+ Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
- Opc = AArch64::ST1Twov2d, Offset = false;
+ Opc = AArch64::ST1Twov2d;
+ Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
- Opc = AArch64::ST1Threev2d, Offset = false;
+ Opc = AArch64::ST1Threev2d;
+ Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
- Opc = AArch64::ST1Fourv2d, Offset = false;
+ Opc = AArch64::ST1Fourv2d;
+ Offset = false;
}
break;
}
@@ -1977,39 +2333,45 @@ void AArch64InstrInfo::loadRegFromStackSlot(
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
- Opc = AArch64::LD1Twov1d, Offset = false;
+ Opc = AArch64::LD1Twov1d;
+ Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
- Opc = AArch64::LD1Threev1d, Offset = false;
+ Opc = AArch64::LD1Threev1d;
+ Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
- Opc = AArch64::LD1Fourv1d, Offset = false;
+ Opc = AArch64::LD1Fourv1d;
+ Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
- Opc = AArch64::LD1Twov2d, Offset = false;
+ Opc = AArch64::LD1Twov2d;
+ Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
- Opc = AArch64::LD1Threev2d, Offset = false;
+ Opc = AArch64::LD1Threev2d;
+ Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
- Opc = AArch64::LD1Fourv2d, Offset = false;
+ Opc = AArch64::LD1Fourv2d;
+ Offset = false;
}
break;
}
@@ -2024,13 +2386,16 @@ void AArch64InstrInfo::loadRegFromStackSlot(
}
void llvm::emitFrameOffset(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg, int Offset,
const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool SetNZCV) {
if (DestReg == SrcReg && Offset == 0)
return;
+ assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
+ "SP increment/decrement not 16-byte aligned");
+
bool isSub = Offset < 0;
if (isSub)
Offset = -Offset;
@@ -2082,8 +2447,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
}
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
- MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex,
+ LiveIntervals *LIS) const {
// This is a bit of a hack. Consider this instruction:
//
// %vreg0<def> = COPY %SP; GPR64all:%vreg0
@@ -2097,9 +2463,9 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// <rdar://problem/11522048>
//
- if (MI->isCopy()) {
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
+ if (MI.isCopy()) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
if (SrcReg == AArch64::SP &&
TargetRegisterInfo::isVirtualRegister(DstReg)) {
MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
@@ -2393,9 +2759,10 @@ void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(AArch64::HINT);
NopInst.addOperand(MCOperand::createImm(0));
}
-/// useMachineCombiner - return true when a target supports MachineCombiner
+
+// AArch64 supports MachineCombiner.
bool AArch64InstrInfo::useMachineCombiner() const {
- // AArch64 supports the combiner
+
return true;
}
//
@@ -2456,37 +2823,75 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
return false;
}
//
+// FP Opcodes that can be combined with a FMUL
+static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
+ switch (Inst.getOpcode()) {
+ case AArch64::FADDSrr:
+ case AArch64::FADDDrr:
+ case AArch64::FADDv2f32:
+ case AArch64::FADDv2f64:
+ case AArch64::FADDv4f32:
+ case AArch64::FSUBSrr:
+ case AArch64::FSUBDrr:
+ case AArch64::FSUBv2f32:
+ case AArch64::FSUBv2f64:
+ case AArch64::FSUBv4f32:
+ return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
+ default:
+ break;
+ }
+ return false;
+}
+//
// Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate(unsigned Opc) {
return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
}
-static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
- unsigned MulOpc, unsigned ZeroReg) {
+//
+// Utility routine that checks if \param MO is defined by an
+// \param CombineOpc instruction in the basic block \param MBB
+static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
+ unsigned CombineOpc, unsigned ZeroReg = 0,
+ bool CheckZeroReg = false) {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineInstr *MI = nullptr;
- // We need a virtual register definition.
+
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
MI = MRI.getUniqueVRegDef(MO.getReg());
// And it needs to be in the trace (otherwise, it won't have a depth).
- if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
- return false;
-
- assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
- MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
- MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
-
- // The third input reg must be zero.
- if (MI->getOperand(3).getReg() != ZeroReg)
+ if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
return false;
-
// Must only used by the user we combine with.
if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
return false;
+ if (CheckZeroReg) {
+ assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
+ // The third input reg must be zero.
+ if (MI->getOperand(3).getReg() != ZeroReg)
+ return false;
+ }
+
return true;
}
+//
+// Is \param MO defined by an integer multiply and can be combined?
+static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
+ unsigned MulOpc, unsigned ZeroReg) {
+ return canCombine(MBB, MO, MulOpc, ZeroReg, true);
+}
+
+//
+// Is \param MO defined by a floating-point multiply and can be combined?
+static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
+ unsigned MulOpc) {
+ return canCombine(MBB, MO, MulOpc);
+}
+
// TODO: There are many more machine instruction opcodes to match:
// 1. Other data types (integer, vectors)
// 2. Other math / logic operations (xor, or)
@@ -2522,17 +2927,17 @@ static bool getMaddPatterns(MachineInstr &Root,
bool Found = false;
if (!isCombineInstrCandidate(Opc))
- return 0;
+ return false;
if (isCombineInstrSettingFlag(Opc)) {
int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
// When NZCV is live bail out.
if (Cmp_NZCV == -1)
- return 0;
- unsigned NewOpc = convertFlagSettingOpcode(&Root);
+ return false;
+ unsigned NewOpc = convertFlagSettingOpcode(Root);
// When opcode can't change bail out.
// CHECKME: do we miss any cases for opcode conversion?
if (NewOpc == Opc)
- return 0;
+ return false;
Opc = NewOpc;
}
@@ -2620,7 +3025,230 @@ static bool getMaddPatterns(MachineInstr &Root,
}
return Found;
}
+/// Floating-Point Support
+
+/// Find instructions that can be turned into madd.
+static bool getFMAPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+
+ if (!isCombineInstrCandidateFP(Root))
+ return 0;
+ MachineBasicBlock &MBB = *Root.getParent();
+ bool Found = false;
+
+ switch (Root.getOpcode()) {
+ default:
+ assert(false && "Unsupported FP instruction in combiner\n");
+ break;
+ case AArch64::FADDSrr:
+ assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
+ "FADDWrr does not have register operands");
+ if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
+ Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv1i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
+ Found = true;
+ }
+ if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
+ Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv1i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::FADDDrr:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
+ Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv1i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
+ Found = true;
+ }
+ if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
+ Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv1i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::FADDv2f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
+ Found = true;
+ }
+ if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv2i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv2f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::FADDv2f64:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2f64)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
+ Found = true;
+ }
+ if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv2i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv2f64)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::FADDv4f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
+ Found = true;
+ }
+ if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv4i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv4f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
+ Found = true;
+ }
+ break;
+
+ case AArch64::FSUBSrr:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
+ Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
+ Found = true;
+ }
+ if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
+ Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv1i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::FSUBDrr:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
+ Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
+ Found = true;
+ }
+ if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
+ Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv1i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::FSUBv2f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv2i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv2f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::FSUBv2f64:
+ if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv2i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv2f64)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::FSUBv4f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv4i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
+ AArch64::FMULv4f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
+ Found = true;
+ }
+ break;
+ }
+ return Found;
+}
+
+/// Return true when a code sequence can improve throughput. It
+/// should be called only for instructions in loops.
+/// \param Pattern - combiner pattern
+bool
+AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+ switch (Pattern) {
+ default:
+ break;
+ case MachineCombinerPattern::FMULADDS_OP1:
+ case MachineCombinerPattern::FMULADDS_OP2:
+ case MachineCombinerPattern::FMULSUBS_OP1:
+ case MachineCombinerPattern::FMULSUBS_OP2:
+ case MachineCombinerPattern::FMULADDD_OP1:
+ case MachineCombinerPattern::FMULADDD_OP2:
+ case MachineCombinerPattern::FMULSUBD_OP1:
+ case MachineCombinerPattern::FMULSUBD_OP2:
+ case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
+ case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
+ case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
+ case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
+ case MachineCombinerPattern::FMLAv2f32_OP2:
+ case MachineCombinerPattern::FMLAv2f32_OP1:
+ case MachineCombinerPattern::FMLAv2f64_OP1:
+ case MachineCombinerPattern::FMLAv2f64_OP2:
+ case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
+ case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
+ case MachineCombinerPattern::FMLAv4f32_OP1:
+ case MachineCombinerPattern::FMLAv4f32_OP2:
+ case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
+ case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
+ case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
+ case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
+ case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
+ case MachineCombinerPattern::FMLSv2f32_OP2:
+ case MachineCombinerPattern::FMLSv2f64_OP2:
+ case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
+ case MachineCombinerPattern::FMLSv4f32_OP2:
+ return true;
+ } // end switch (Pattern)
+ return false;
+}
/// Return true when there is potentially a faster code sequence for an
/// instruction chain ending in \p Root. All potential patterns are listed in
/// the \p Pattern vector. Pattern should be sorted in priority order since the
@@ -2629,28 +3257,35 @@ static bool getMaddPatterns(MachineInstr &Root,
bool AArch64InstrInfo::getMachineCombinerPatterns(
MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ // Integer patterns
if (getMaddPatterns(Root, Patterns))
return true;
+ // Floating point patterns
+ if (getFMAPatterns(Root, Patterns))
+ return true;
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
}
-/// genMadd - Generate madd instruction and combine mul and add.
-/// Example:
-/// MUL I=A,B,0
-/// ADD R,I,C
-/// ==> MADD R,A,B,C
-/// \param Root is the ADD instruction
+enum class FMAInstKind { Default, Indexed, Accumulator };
+/// genFusedMultiply - Generate fused multiply instructions.
+/// This function supports both integer and floating point instructions.
+/// A typical example:
+/// F|MUL I=A,B,0
+/// F|ADD R,I,C
+/// ==> F|MADD R,A,B,C
+/// \param Root is the F|ADD instruction
/// \param [out] InsInstrs is a vector of machine instructions and will
/// contain the generated madd instruction
/// \param IdxMulOpd is index of operand in Root that is the result of
-/// the MUL. In the example above IdxMulOpd is 1.
-/// \param MaddOpc the opcode fo the madd instruction
-static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
- const TargetInstrInfo *TII, MachineInstr &Root,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- unsigned IdxMulOpd, unsigned MaddOpc,
- const TargetRegisterClass *RC) {
+/// the F|MUL. In the example above IdxMulOpd is 1.
+/// \param MaddOpc the opcode fo the f|madd instruction
+static MachineInstr *
+genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
+ unsigned MaddOpc, const TargetRegisterClass *RC,
+ FMAInstKind kind = FMAInstKind::Default) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
@@ -2672,12 +3307,26 @@ static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
MRI.constrainRegClass(SrcReg2, RC);
- MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
- ResultReg)
- .addReg(SrcReg0, getKillRegState(Src0IsKill))
- .addReg(SrcReg1, getKillRegState(Src1IsKill))
- .addReg(SrcReg2, getKillRegState(Src2IsKill));
- // Insert the MADD
+ MachineInstrBuilder MIB;
+ if (kind == FMAInstKind::Default)
+ MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill))
+ .addReg(SrcReg2, getKillRegState(Src2IsKill));
+ else if (kind == FMAInstKind::Indexed)
+ MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
+ .addReg(SrcReg2, getKillRegState(Src2IsKill))
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill))
+ .addImm(MUL->getOperand(3).getImm());
+ else if (kind == FMAInstKind::Accumulator)
+ MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
+ .addReg(SrcReg2, getKillRegState(Src2IsKill))
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill));
+ else
+ assert(false && "Invalid FMA instruction kind \n");
+ // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
InsInstrs.push_back(MIB);
return MUL;
}
@@ -2765,7 +3414,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
- MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDW_OP2:
case MachineCombinerPattern::MULADDX_OP2:
@@ -2780,7 +3429,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
- MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDWI_OP1:
case MachineCombinerPattern::MULADDXI_OP1: {
@@ -2872,7 +3521,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
Opc = AArch64::MSUBXrrr;
RC = &AArch64::GPR64RegClass;
}
- MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBWI_OP1:
case MachineCombinerPattern::MULSUBXI_OP1: {
@@ -2917,6 +3566,234 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
+ // Floating Point Support
+ case MachineCombinerPattern::FMULADDS_OP1:
+ case MachineCombinerPattern::FMULADDD_OP1:
+ // MUL I=A,B,0
+ // ADD R,I,C
+ // ==> MADD R,A,B,C
+ // --- Create(MADD);
+ if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
+ Opc = AArch64::FMADDSrrr;
+ RC = &AArch64::FPR32RegClass;
+ } else {
+ Opc = AArch64::FMADDDrrr;
+ RC = &AArch64::FPR64RegClass;
+ }
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::FMULADDS_OP2:
+ case MachineCombinerPattern::FMULADDD_OP2:
+ // FMUL I=A,B,0
+ // FADD R,C,I
+ // ==> FMADD R,A,B,C
+ // --- Create(FMADD);
+ if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
+ Opc = AArch64::FMADDSrrr;
+ RC = &AArch64::FPR32RegClass;
+ } else {
+ Opc = AArch64::FMADDDrrr;
+ RC = &AArch64::FPR64RegClass;
+ }
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+
+ case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
+ Opc = AArch64::FMLAv1i32_indexed;
+ RC = &AArch64::FPR32RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+ case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
+ Opc = AArch64::FMLAv1i32_indexed;
+ RC = &AArch64::FPR32RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+
+ case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
+ Opc = AArch64::FMLAv1i64_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+ case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
+ Opc = AArch64::FMLAv1i64_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+
+ case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMLAv2f32_OP1:
+ RC = &AArch64::FPR64RegClass;
+ if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
+ Opc = AArch64::FMLAv2i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLAv2f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+ case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
+ case MachineCombinerPattern::FMLAv2f32_OP2:
+ RC = &AArch64::FPR64RegClass;
+ if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
+ Opc = AArch64::FMLAv2i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLAv2f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+
+ case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMLAv2f64_OP1:
+ RC = &AArch64::FPR128RegClass;
+ if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
+ Opc = AArch64::FMLAv2i64_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLAv2f64;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+ case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
+ case MachineCombinerPattern::FMLAv2f64_OP2:
+ RC = &AArch64::FPR128RegClass;
+ if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
+ Opc = AArch64::FMLAv2i64_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLAv2f64;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+
+ case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMLAv4f32_OP1:
+ RC = &AArch64::FPR128RegClass;
+ if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
+ Opc = AArch64::FMLAv4i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLAv4f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+
+ case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
+ case MachineCombinerPattern::FMLAv4f32_OP2:
+ RC = &AArch64::FPR128RegClass;
+ if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
+ Opc = AArch64::FMLAv4i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLAv4f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+
+ case MachineCombinerPattern::FMULSUBS_OP1:
+ case MachineCombinerPattern::FMULSUBD_OP1: {
+ // FMUL I=A,B,0
+ // FSUB R,I,C
+ // ==> FNMSUB R,A,B,C // = -C + A*B
+ // --- Create(FNMSUB);
+ if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
+ Opc = AArch64::FNMSUBSrrr;
+ RC = &AArch64::FPR32RegClass;
+ } else {
+ Opc = AArch64::FNMSUBDrrr;
+ RC = &AArch64::FPR64RegClass;
+ }
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ }
+ case MachineCombinerPattern::FMULSUBS_OP2:
+ case MachineCombinerPattern::FMULSUBD_OP2: {
+ // FMUL I=A,B,0
+ // FSUB R,C,I
+ // ==> FMSUB R,A,B,C (computes C - A*B)
+ // --- Create(FMSUB);
+ if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
+ Opc = AArch64::FMSUBSrrr;
+ RC = &AArch64::FPR32RegClass;
+ } else {
+ Opc = AArch64::FMSUBDrrr;
+ RC = &AArch64::FPR64RegClass;
+ }
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+
+ case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
+ Opc = AArch64::FMLSv1i32_indexed;
+ RC = &AArch64::FPR32RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+
+ case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
+ Opc = AArch64::FMLSv1i64_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+
+ case MachineCombinerPattern::FMLSv2f32_OP2:
+ case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
+ RC = &AArch64::FPR64RegClass;
+ if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
+ Opc = AArch64::FMLSv2i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLSv2f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+
+ case MachineCombinerPattern::FMLSv2f64_OP2:
+ case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
+ RC = &AArch64::FPR128RegClass;
+ if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
+ Opc = AArch64::FMLSv2i64_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLSv2f64;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+
+ case MachineCombinerPattern::FMLSv4f32_OP2:
+ case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
+ RC = &AArch64::FPR128RegClass;
+ if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
+ Opc = AArch64::FMLSv4i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ } else {
+ Opc = AArch64::FMLSv4f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ }
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
@@ -2940,14 +3817,23 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
/// to
/// b.<condition code>
///
+/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
+/// compare's constant operand is power of 2.
+///
+/// Examples:
+/// and w8, w8, #0x400
+/// cbnz w8, L1
+/// to
+/// tbnz w8, #10, L1
+///
/// \param MI Conditional Branch
/// \return True when the simple conditional branch is generated
///
-bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
+bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
bool IsNegativeBranch = false;
bool IsTestAndBranch = false;
unsigned TargetBBInMI = 0;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default:
llvm_unreachable("Unknown branch instruction?");
case AArch64::Bcc:
@@ -2976,48 +3862,108 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
// So we increment a zero register and test for bits other
// than bit 0? Conservatively bail out in case the verifier
// missed this case.
- if (IsTestAndBranch && MI->getOperand(1).getImm())
+ if (IsTestAndBranch && MI.getOperand(1).getImm())
return false;
// Find Definition.
- assert(MI->getParent() && "Incomplete machine instruciton\n");
- MachineBasicBlock *MBB = MI->getParent();
+ assert(MI.getParent() && "Incomplete machine instruciton\n");
+ MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- unsigned VReg = MI->getOperand(0).getReg();
+ unsigned VReg = MI.getOperand(0).getReg();
if (!TargetRegisterInfo::isVirtualRegister(VReg))
return false;
MachineInstr *DefMI = MRI->getVRegDef(VReg);
- // Look for CSINC
- if (!(DefMI->getOpcode() == AArch64::CSINCWr &&
- DefMI->getOperand(1).getReg() == AArch64::WZR &&
- DefMI->getOperand(2).getReg() == AArch64::WZR) &&
- !(DefMI->getOpcode() == AArch64::CSINCXr &&
- DefMI->getOperand(1).getReg() == AArch64::XZR &&
- DefMI->getOperand(2).getReg() == AArch64::XZR))
- return false;
+ // Look through COPY instructions to find definition.
+ while (DefMI->isCopy()) {
+ unsigned CopyVReg = DefMI->getOperand(1).getReg();
+ if (!MRI->hasOneNonDBGUse(CopyVReg))
+ return false;
+ if (!MRI->hasOneDef(CopyVReg))
+ return false;
+ DefMI = MRI->getVRegDef(CopyVReg);
+ }
- if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
+ switch (DefMI->getOpcode()) {
+ default:
return false;
+ // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
+ case AArch64::ANDWri:
+ case AArch64::ANDXri: {
+ if (IsTestAndBranch)
+ return false;
+ if (DefMI->getParent() != MBB)
+ return false;
+ if (!MRI->hasOneNonDBGUse(VReg))
+ return false;
- AArch64CC::CondCode CC =
- (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
- bool CheckOnlyCCWrites = true;
- // Convert only when the condition code is not modified between
- // the CSINC and the branch. The CC may be used by other
- // instructions in between.
- if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo()))
- return false;
- MachineBasicBlock &RefToMBB = *MBB;
- MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB();
- DebugLoc DL = MI->getDebugLoc();
- if (IsNegativeBranch)
- CC = AArch64CC::getInvertedCondCode(CC);
- BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
- MI->eraseFromParent();
- return true;
+ bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
+ uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
+ DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
+ if (!isPowerOf2_64(Mask))
+ return false;
+
+ MachineOperand &MO = DefMI->getOperand(1);
+ unsigned NewReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(NewReg))
+ return false;
+
+ assert(!MRI->def_empty(NewReg) && "Register must be defined.");
+
+ MachineBasicBlock &RefToMBB = *MBB;
+ MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Imm = Log2_64(Mask);
+ unsigned Opc = (Imm < 32)
+ ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
+ : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
+ MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
+ .addReg(NewReg)
+ .addImm(Imm)
+ .addMBB(TBB);
+ // Register lives on to the CBZ now.
+ MO.setIsKill(false);
+
+ // For immediate smaller than 32, we need to use the 32-bit
+ // variant (W) in all cases. Indeed the 64-bit variant does not
+ // allow to encode them.
+ // Therefore, if the input register is 64-bit, we need to take the
+ // 32-bit sub-part.
+ if (!Is32Bit && Imm < 32)
+ NewMI->getOperand(0).setSubReg(AArch64::sub_32);
+ MI.eraseFromParent();
+ return true;
+ }
+ // Look for CSINC
+ case AArch64::CSINCWr:
+ case AArch64::CSINCXr: {
+ if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
+ DefMI->getOperand(2).getReg() == AArch64::WZR) &&
+ !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
+ DefMI->getOperand(2).getReg() == AArch64::XZR))
+ return false;
+
+ if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
+ return false;
+
+ AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
+ // Convert only when the condition code is not modified between
+ // the CSINC and the branch. The CC may be used by other
+ // instructions in between.
+ if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
+ return false;
+ MachineBasicBlock &RefToMBB = *MBB;
+ MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
+ DebugLoc DL = MI.getDebugLoc();
+ if (IsNegativeBranch)
+ CC = AArch64CC::getInvertedCondCode(CC);
+ BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
}
std::pair<unsigned, unsigned>
@@ -3046,7 +3992,6 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_GOT, "aarch64-got"},
{MO_NC, "aarch64-nc"},
- {MO_TLS, "aarch64-tls"},
- {MO_CONSTPOOL, "aarch64-constant-pool"}};
+ {MO_TLS, "aarch64-tls"}};
return makeArrayRef(TargetFlags);
}