summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp392
1 files changed, 269 insertions, 123 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b03d421d3e6d..f8f8ee3f1e6c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1112,8 +1112,8 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
// The first operand can be a frame index where we'd normally expect a
// register.
assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
@@ -1155,8 +1155,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME: In order to convert CmpValue to 0 or 1
- CmpValue = MI.getOperand(2).getImm() != 0;
+ CmpValue = MI.getOperand(2).getImm();
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
@@ -1165,14 +1164,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
- // while the type of CmpValue is int. When converting uint64_t to int,
- // the high 32 bits of uint64_t will be lost.
- // In fact it causes a bug in spec2006-483.xalancbmk
- // CmpValue is only used to compare with zero in OptimizeCompareInstr
CmpValue = AArch64_AM::decodeLogicalImmediate(
MI.getOperand(2).getImm(),
- MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
+ MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
return true;
}
@@ -1433,8 +1427,8 @@ bool AArch64InstrInfo::optimizePTestInstr(
/// instruction.
/// Only comparison with zero is supported.
bool AArch64InstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue, const MachineRegisterInfo *MRI) const {
assert(CmpInstr.getParent());
assert(MRI);
@@ -1462,10 +1456,6 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
- // Continue only if we have a "ri" where immediate is zero.
- // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
- // function.
- assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
if (SrcReg2 != 0)
return false;
@@ -1473,9 +1463,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false;
- if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
+ if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
return true;
- return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
+ return (CmpValue == 0 || CmpValue == 1) &&
+ removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
}
/// Get opcode of S version of Instr.
@@ -2099,10 +2090,8 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
default:
break;
case TargetOpcode::COPY: {
- // FPR64 copies will by lowered to ORR.16b
Register DstReg = MI.getOperand(0).getReg();
- return (AArch64::FPR64RegClass.contains(DstReg) ||
- AArch64::FPR128RegClass.contains(DstReg));
+ return AArch64::FPR128RegClass.contains(DstReg);
}
case AArch64::ORRv16i8:
if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
@@ -2274,32 +2263,35 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STNPSi:
case AArch64::LDG:
case AArch64::STGPi:
+
case AArch64::LD1B_IMM:
- case AArch64::LD1H_IMM:
- case AArch64::LD1W_IMM:
- case AArch64::LD1D_IMM:
- case AArch64::ST1B_IMM:
- case AArch64::ST1H_IMM:
- case AArch64::ST1W_IMM:
- case AArch64::ST1D_IMM:
case AArch64::LD1B_H_IMM:
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::LD1H_IMM:
case AArch64::LD1H_S_IMM:
+ case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::LD1W_IMM:
case AArch64::LD1W_D_IMM:
case AArch64::LD1SW_D_IMM:
+ case AArch64::LD1D_IMM:
+
+ case AArch64::ST1B_IMM:
case AArch64::ST1B_H_IMM:
- case AArch64::ST1H_S_IMM:
- case AArch64::ST1W_D_IMM:
- case AArch64::LD1B_S_IMM:
- case AArch64::LD1SB_S_IMM:
- case AArch64::LD1H_D_IMM:
- case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
- case AArch64::ST1H_D_IMM:
- case AArch64::LD1B_D_IMM:
- case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1H_D_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST1W_D_IMM:
+ case AArch64::ST1D_IMM:
+
case AArch64::LD1RB_IMM:
case AArch64::LD1RB_H_IMM:
case AArch64::LD1RB_S_IMM:
@@ -2316,6 +2308,32 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
case AArch64::LD1RD_IMM:
+
+ case AArch64::LDNT1B_ZRI:
+ case AArch64::LDNT1H_ZRI:
+ case AArch64::LDNT1W_ZRI:
+ case AArch64::LDNT1D_ZRI:
+ case AArch64::STNT1B_ZRI:
+ case AArch64::STNT1H_ZRI:
+ case AArch64::STNT1W_ZRI:
+ case AArch64::STNT1D_ZRI:
+
+ case AArch64::LDNF1B_IMM:
+ case AArch64::LDNF1B_H_IMM:
+ case AArch64::LDNF1B_S_IMM:
+ case AArch64::LDNF1B_D_IMM:
+ case AArch64::LDNF1SB_H_IMM:
+ case AArch64::LDNF1SB_S_IMM:
+ case AArch64::LDNF1SB_D_IMM:
+ case AArch64::LDNF1H_IMM:
+ case AArch64::LDNF1H_S_IMM:
+ case AArch64::LDNF1H_D_IMM:
+ case AArch64::LDNF1SH_S_IMM:
+ case AArch64::LDNF1SH_D_IMM:
+ case AArch64::LDNF1W_IMM:
+ case AArch64::LDNF1W_D_IMM:
+ case AArch64::LDNF1SW_D_IMM:
+ case AArch64::LDNF1D_IMM:
return 3;
case AArch64::ADDG:
case AArch64::STGOffset:
@@ -2866,10 +2884,22 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1H_IMM:
case AArch64::LD1W_IMM:
case AArch64::LD1D_IMM:
+ case AArch64::LDNT1B_ZRI:
+ case AArch64::LDNT1H_ZRI:
+ case AArch64::LDNT1W_ZRI:
+ case AArch64::LDNT1D_ZRI:
case AArch64::ST1B_IMM:
case AArch64::ST1H_IMM:
case AArch64::ST1W_IMM:
case AArch64::ST1D_IMM:
+ case AArch64::STNT1B_ZRI:
+ case AArch64::STNT1H_ZRI:
+ case AArch64::STNT1W_ZRI:
+ case AArch64::STNT1D_ZRI:
+ case AArch64::LDNF1B_IMM:
+ case AArch64::LDNF1H_IMM:
+ case AArch64::LDNF1W_IMM:
+ case AArch64::LDNF1D_IMM:
// A full vectors worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(16);
@@ -2886,6 +2916,12 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST1B_H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1W_D_IMM:
+ case AArch64::LDNF1B_H_IMM:
+ case AArch64::LDNF1SB_H_IMM:
+ case AArch64::LDNF1H_S_IMM:
+ case AArch64::LDNF1SH_S_IMM:
+ case AArch64::LDNF1W_D_IMM:
+ case AArch64::LDNF1SW_D_IMM:
// A half vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(8);
@@ -2899,6 +2935,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
case AArch64::ST1H_D_IMM:
+ case AArch64::LDNF1B_S_IMM:
+ case AArch64::LDNF1SB_S_IMM:
+ case AArch64::LDNF1H_D_IMM:
+ case AArch64::LDNF1SH_D_IMM:
// A quarter vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(4);
@@ -2909,6 +2949,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
+ case AArch64::LDNF1B_D_IMM:
+ case AArch64::LDNF1SB_D_IMM:
// A eighth vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(2);
@@ -3503,77 +3545,37 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
- &AArch64::FPR32RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
- &AArch64::FPR32RegClass);
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ DestReg =
+ RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
+ SrcReg =
+ RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
- &AArch64::FPR32RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
- &AArch64::FPR32RegClass);
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ DestReg =
+ RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
+ SrcReg =
+ RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -4339,6 +4341,10 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
case AArch64::ST1Twov1d:
case AArch64::ST1Threev1d:
case AArch64::ST1Fourv1d:
+ case AArch64::ST1i8:
+ case AArch64::ST1i16:
+ case AArch64::ST1i32:
+ case AArch64::ST1i64:
case AArch64::IRG:
case AArch64::IRGstack:
case AArch64::STGloop:
@@ -4911,6 +4917,55 @@ static bool getFMAPatterns(MachineInstr &Root,
return Found;
}
+static bool getFMULPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ MachineBasicBlock &MBB = *Root.getParent();
+ bool Found = false;
+
+ auto Match = [&](unsigned Opcode, int Operand,
+ MachineCombinerPattern Pattern) -> bool {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineOperand &MO = Root.getOperand(Operand);
+ MachineInstr *MI = nullptr;
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
+ MI = MRI.getUniqueVRegDef(MO.getReg());
+ if (MI && MI->getOpcode() == Opcode) {
+ Patterns.push_back(Pattern);
+ return true;
+ }
+ return false;
+ };
+
+ typedef MachineCombinerPattern MCP;
+
+ switch (Root.getOpcode()) {
+ default:
+ return false;
+ case AArch64::FMULv2f32:
+ Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
+ Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
+ break;
+ case AArch64::FMULv2f64:
+ Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
+ Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
+ break;
+ case AArch64::FMULv4f16:
+ Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
+ Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
+ break;
+ case AArch64::FMULv4f32:
+ Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
+ Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
+ break;
+ case AArch64::FMULv8f16:
+ Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
+ Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
+ break;
+ }
+
+ return Found;
+}
+
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
@@ -4974,6 +5029,16 @@ bool AArch64InstrInfo::isThroughputPattern(
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
case MachineCombinerPattern::FMLSv4f32_OP2:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP2:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP2:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP2:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP2:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP2:
case MachineCombinerPattern::MULADDv8i8_OP1:
case MachineCombinerPattern::MULADDv8i8_OP2:
case MachineCombinerPattern::MULADDv16i8_OP1:
@@ -5030,6 +5095,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getMaddPatterns(Root, Patterns))
return true;
// Floating point patterns
+ if (getFMULPatterns(Root, Patterns))
+ return true;
if (getFMAPatterns(Root, Patterns))
return true;
@@ -5118,6 +5185,42 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
return MUL;
}
+/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
+static MachineInstr *
+genIndexedMultiply(MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxDupOp, unsigned MulOpc,
+ const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
+ assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
+ "Invalid index of FMUL operand");
+
+ MachineFunction &MF = *Root.getMF();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ MachineInstr *Dup =
+ MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
+
+ Register DupSrcReg = Dup->getOperand(1).getReg();
+ MRI.clearKillFlags(DupSrcReg);
+ MRI.constrainRegClass(DupSrcReg, RC);
+
+ unsigned DupSrcLane = Dup->getOperand(2).getImm();
+
+ unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
+ MachineOperand &MulOp = Root.getOperand(IdxMulOp);
+
+ Register ResultReg = Root.getOperand(0).getReg();
+
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg)
+ .add(MulOp)
+ .addReg(DupSrcReg)
+ .addImm(DupSrcLane);
+
+ InsInstrs.push_back(MIB);
+ return &Root;
+}
+
/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
/// instructions.
///
@@ -5329,15 +5432,15 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
uint64_t UImm = SignExtend64(Imm, BitSize);
uint64_t Encoding;
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
- }
+ if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ return;
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
case MachineCombinerPattern::MULSUBW_OP1:
@@ -5420,15 +5523,15 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
uint64_t UImm = SignExtend64(-Imm, BitSize);
uint64_t Encoding;
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
- }
+ if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ return;
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
@@ -6076,12 +6179,50 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
+ case MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
+ &AArch64::FPR128_loRegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
+ &AArch64::FPR128_loRegClass, MRI);
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
- // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
- // CodeGen/AArch64/urem-seteq-nonzero.ll.
- // assert(MUL && "MUL was never set");
- DelInstrs.push_back(MUL);
+ if (MUL)
+ DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
}
@@ -6624,13 +6765,8 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
- const std::vector<MCCFIInstruction> &CFIInstructions =
- RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
- if (MBBI->isCFIInstruction()) {
- unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
- MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ if (MBBI->isCFIInstruction())
CFICount++;
- }
MBBI++;
}
@@ -7212,7 +7348,8 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
.setMIFlags(MachineInstr::FrameSetup);
// If v8.3a features are available we can replace a RET instruction by
- // RETAA or RETAB and omit the AUT instructions
+ // RETAA or RETAB and omit the AUT instructions. In this case the
+ // DW_CFA_AARCH64_negate_ra_state can't be emitted.
if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
@@ -7225,6 +7362,11 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
: AArch64::AUTIBSP))
.setMIFlag(MachineInstr::FrameDestroy);
+ unsigned CFIIndexAuth =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndexAuth)
+ .setMIFlags(MachineInstr::FrameDestroy);
}
}
}
@@ -7401,7 +7543,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
unsigned Reg = findRegisterToSaveLRTo(C);
assert(Reg != 0 && "No callee-saved register available?");
- // Save and restore LR from that register.
+ // LR has to be a live in so that we can save it.
+ if (!MBB.isLiveIn(AArch64::LR))
+ MBB.addLiveIn(AArch64::LR);
+
+ // Save and restore LR from Reg.
Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
.addReg(AArch64::XZR)
.addReg(AArch64::LR)