diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 392 |
1 files changed, 269 insertions, 123 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b03d421d3e6d..f8f8ee3f1e6c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1112,8 +1112,8 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. /// Return true if the comparison instruction can be analyzed. bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, - Register &SrcReg2, int &CmpMask, - int &CmpValue) const { + Register &SrcReg2, int64_t &CmpMask, + int64_t &CmpValue) const { // The first operand can be a frame index where we'd normally expect a // register. assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); @@ -1155,8 +1155,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME: In order to convert CmpValue to 0 or 1 - CmpValue = MI.getOperand(2).getImm() != 0; + CmpValue = MI.getOperand(2).getImm(); return true; case AArch64::ANDSWri: case AArch64::ANDSXri: @@ -1165,14 +1164,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME:The return val type of decodeLogicalImmediate is uint64_t, - // while the type of CmpValue is int. When converting uint64_t to int, - // the high 32 bits of uint64_t will be lost. - // In fact it causes a bug in spec2006-483.xalancbmk - // CmpValue is only used to compare with zero in OptimizeCompareInstr CmpValue = AArch64_AM::decodeLogicalImmediate( MI.getOperand(2).getImm(), - MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; + MI.getOpcode() == AArch64::ANDSWri ? 32 : 64); return true; } @@ -1433,8 +1427,8 @@ bool AArch64InstrInfo::optimizePTestInstr( /// instruction. /// Only comparison with zero is supported. bool AArch64InstrInfo::optimizeCompareInstr( - MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask, - int CmpValue, const MachineRegisterInfo *MRI) const { + MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, + int64_t CmpValue, const MachineRegisterInfo *MRI) const { assert(CmpInstr.getParent()); assert(MRI); @@ -1462,10 +1456,6 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (CmpInstr.getOpcode() == AArch64::PTEST_PP) return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); - // Continue only if we have a "ri" where immediate is zero. - // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare - // function. - assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); if (SrcReg2 != 0) return false; @@ -1473,9 +1463,10 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) return false; - if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) + if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) return true; - return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); + return (CmpValue == 0 || CmpValue == 1) && + removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); } /// Get opcode of S version of Instr. @@ -2099,10 +2090,8 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { default: break; case TargetOpcode::COPY: { - // FPR64 copies will by lowered to ORR.16b Register DstReg = MI.getOperand(0).getReg(); - return (AArch64::FPR64RegClass.contains(DstReg) || - AArch64::FPR128RegClass.contains(DstReg)); + return AArch64::FPR128RegClass.contains(DstReg); } case AArch64::ORRv16i8: if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { @@ -2274,32 +2263,35 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STNPSi: case AArch64::LDG: case AArch64::STGPi: + case AArch64::LD1B_IMM: - case AArch64::LD1H_IMM: - case AArch64::LD1W_IMM: - case AArch64::LD1D_IMM: - case AArch64::ST1B_IMM: - case AArch64::ST1H_IMM: - case AArch64::ST1W_IMM: - case AArch64::ST1D_IMM: case AArch64::LD1B_H_IMM: + case AArch64::LD1B_S_IMM: + case AArch64::LD1B_D_IMM: case AArch64::LD1SB_H_IMM: + case AArch64::LD1SB_S_IMM: + case AArch64::LD1SB_D_IMM: + case AArch64::LD1H_IMM: case AArch64::LD1H_S_IMM: + case AArch64::LD1H_D_IMM: case AArch64::LD1SH_S_IMM: + case AArch64::LD1SH_D_IMM: + case AArch64::LD1W_IMM: case AArch64::LD1W_D_IMM: case AArch64::LD1SW_D_IMM: + case AArch64::LD1D_IMM: + + case AArch64::ST1B_IMM: case AArch64::ST1B_H_IMM: - case AArch64::ST1H_S_IMM: - case AArch64::ST1W_D_IMM: - case AArch64::LD1B_S_IMM: - case AArch64::LD1SB_S_IMM: - case AArch64::LD1H_D_IMM: - case AArch64::LD1SH_D_IMM: case AArch64::ST1B_S_IMM: - case AArch64::ST1H_D_IMM: - case AArch64::LD1B_D_IMM: - case AArch64::LD1SB_D_IMM: case AArch64::ST1B_D_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1H_S_IMM: + case AArch64::ST1H_D_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1W_D_IMM: + case AArch64::ST1D_IMM: + case AArch64::LD1RB_IMM: case AArch64::LD1RB_H_IMM: case AArch64::LD1RB_S_IMM: @@ -2316,6 +2308,32 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::LD1RW_D_IMM: case AArch64::LD1RSW_IMM: case AArch64::LD1RD_IMM: + + case AArch64::LDNT1B_ZRI: + case AArch64::LDNT1H_ZRI: + case AArch64::LDNT1W_ZRI: + case AArch64::LDNT1D_ZRI: + case AArch64::STNT1B_ZRI: + case AArch64::STNT1H_ZRI: + case AArch64::STNT1W_ZRI: + case AArch64::STNT1D_ZRI: + + case AArch64::LDNF1B_IMM: + case AArch64::LDNF1B_H_IMM: + case AArch64::LDNF1B_S_IMM: + case AArch64::LDNF1B_D_IMM: + case AArch64::LDNF1SB_H_IMM: + case AArch64::LDNF1SB_S_IMM: + case AArch64::LDNF1SB_D_IMM: + case AArch64::LDNF1H_IMM: + case AArch64::LDNF1H_S_IMM: + case AArch64::LDNF1H_D_IMM: + case AArch64::LDNF1SH_S_IMM: + case AArch64::LDNF1SH_D_IMM: + case AArch64::LDNF1W_IMM: + case AArch64::LDNF1W_D_IMM: + case AArch64::LDNF1SW_D_IMM: + case AArch64::LDNF1D_IMM: return 3; case AArch64::ADDG: case AArch64::STGOffset: @@ -2866,10 +2884,22 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::LD1H_IMM: case AArch64::LD1W_IMM: case AArch64::LD1D_IMM: + case AArch64::LDNT1B_ZRI: + case AArch64::LDNT1H_ZRI: + case AArch64::LDNT1W_ZRI: + case AArch64::LDNT1D_ZRI: case AArch64::ST1B_IMM: case AArch64::ST1H_IMM: case AArch64::ST1W_IMM: case AArch64::ST1D_IMM: + case AArch64::STNT1B_ZRI: + case AArch64::STNT1H_ZRI: + case AArch64::STNT1W_ZRI: + case AArch64::STNT1D_ZRI: + case AArch64::LDNF1B_IMM: + case AArch64::LDNF1H_IMM: + case AArch64::LDNF1W_IMM: + case AArch64::LDNF1D_IMM: // A full vectors worth of data // Width = mbytes * elements Scale = TypeSize::Scalable(16); @@ -2886,6 +2916,12 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::ST1B_H_IMM: case AArch64::ST1H_S_IMM: case AArch64::ST1W_D_IMM: + case AArch64::LDNF1B_H_IMM: + case AArch64::LDNF1SB_H_IMM: + case AArch64::LDNF1H_S_IMM: + case AArch64::LDNF1SH_S_IMM: + case AArch64::LDNF1W_D_IMM: + case AArch64::LDNF1SW_D_IMM: // A half vector worth of data // Width = mbytes * elements Scale = TypeSize::Scalable(8); @@ -2899,6 +2935,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::LD1SH_D_IMM: case AArch64::ST1B_S_IMM: case AArch64::ST1H_D_IMM: + case AArch64::LDNF1B_S_IMM: + case AArch64::LDNF1SB_S_IMM: + case AArch64::LDNF1H_D_IMM: + case AArch64::LDNF1SH_D_IMM: // A quarter vector worth of data // Width = mbytes * elements Scale = TypeSize::Scalable(4); @@ -2909,6 +2949,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::LD1B_D_IMM: case AArch64::LD1SB_D_IMM: case AArch64::ST1B_D_IMM: + case AArch64::LDNF1B_D_IMM: + case AArch64::LDNF1SB_D_IMM: // A eighth vector worth of data // Width = mbytes * elements Scale = TypeSize::Scalable(2); @@ -3503,77 +3545,37 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR64RegClass.contains(DestReg) && AArch64::FPR64RegClass.contains(SrcReg)) { - if (Subtarget.hasNEON()) { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, - &AArch64::FPR128RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, - &AArch64::FPR128RegClass); - BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (AArch64::FPR32RegClass.contains(DestReg) && AArch64::FPR32RegClass.contains(SrcReg)) { - if (Subtarget.hasNEON()) { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, - &AArch64::FPR128RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, - &AArch64::FPR128RegClass); - BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (AArch64::FPR16RegClass.contains(DestReg) && AArch64::FPR16RegClass.contains(SrcReg)) { - if (Subtarget.hasNEON()) { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR128RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR128RegClass); - BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR32RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + DestReg = + RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (AArch64::FPR8RegClass.contains(DestReg) && AArch64::FPR8RegClass.contains(SrcReg)) { - if (Subtarget.hasNEON()) { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR128RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR128RegClass); - BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR32RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + DestReg = + RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); return; } @@ -4339,6 +4341,10 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, case AArch64::ST1Twov1d: case AArch64::ST1Threev1d: case AArch64::ST1Fourv1d: + case AArch64::ST1i8: + case AArch64::ST1i16: + case AArch64::ST1i32: + case AArch64::ST1i64: case AArch64::IRG: case AArch64::IRGstack: case AArch64::STGloop: @@ -4911,6 +4917,55 @@ static bool getFMAPatterns(MachineInstr &Root, return Found; } +static bool getFMULPatterns(MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns) { + MachineBasicBlock &MBB = *Root.getParent(); + bool Found = false; + + auto Match = [&](unsigned Opcode, int Operand, + MachineCombinerPattern Pattern) -> bool { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineOperand &MO = Root.getOperand(Operand); + MachineInstr *MI = nullptr; + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) + MI = MRI.getUniqueVRegDef(MO.getReg()); + if (MI && MI->getOpcode() == Opcode) { + Patterns.push_back(Pattern); + return true; + } + return false; + }; + + typedef MachineCombinerPattern MCP; + + switch (Root.getOpcode()) { + default: + return false; + case AArch64::FMULv2f32: + Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1); + Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2); + break; + case AArch64::FMULv2f64: + Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1); + Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2); + break; + case AArch64::FMULv4f16: + Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1); + Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2); + break; + case AArch64::FMULv4f32: + Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1); + Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2); + break; + case AArch64::FMULv8f16: + Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1); + Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2); + break; + } + + return Found; +} + /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. /// \param Pattern - combiner pattern @@ -4974,6 +5029,16 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv4i32_indexed_OP2: case MachineCombinerPattern::FMLSv4f32_OP2: + case MachineCombinerPattern::FMULv2i32_indexed_OP1: + case MachineCombinerPattern::FMULv2i32_indexed_OP2: + case MachineCombinerPattern::FMULv2i64_indexed_OP1: + case MachineCombinerPattern::FMULv2i64_indexed_OP2: + case MachineCombinerPattern::FMULv4i16_indexed_OP1: + case MachineCombinerPattern::FMULv4i16_indexed_OP2: + case MachineCombinerPattern::FMULv4i32_indexed_OP1: + case MachineCombinerPattern::FMULv4i32_indexed_OP2: + case MachineCombinerPattern::FMULv8i16_indexed_OP1: + case MachineCombinerPattern::FMULv8i16_indexed_OP2: case MachineCombinerPattern::MULADDv8i8_OP1: case MachineCombinerPattern::MULADDv8i8_OP2: case MachineCombinerPattern::MULADDv16i8_OP1: @@ -5030,6 +5095,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( if (getMaddPatterns(Root, Patterns)) return true; // Floating point patterns + if (getFMULPatterns(Root, Patterns)) + return true; if (getFMAPatterns(Root, Patterns)) return true; @@ -5118,6 +5185,42 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, return MUL; } +/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane) +static MachineInstr * +genIndexedMultiply(MachineInstr &Root, + SmallVectorImpl<MachineInstr *> &InsInstrs, + unsigned IdxDupOp, unsigned MulOpc, + const TargetRegisterClass *RC, MachineRegisterInfo &MRI) { + assert(((IdxDupOp == 1) || (IdxDupOp == 2)) && + "Invalid index of FMUL operand"); + + MachineFunction &MF = *Root.getMF(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + MachineInstr *Dup = + MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg()); + + Register DupSrcReg = Dup->getOperand(1).getReg(); + MRI.clearKillFlags(DupSrcReg); + MRI.constrainRegClass(DupSrcReg, RC); + + unsigned DupSrcLane = Dup->getOperand(2).getImm(); + + unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1; + MachineOperand &MulOp = Root.getOperand(IdxMulOp); + + Register ResultReg = Root.getOperand(0).getReg(); + + MachineInstrBuilder MIB; + MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg) + .add(MulOp) + .addReg(DupSrcReg) + .addImm(DupSrcLane); + + InsInstrs.push_back(MIB); + return &Root; +} + /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate /// instructions. /// @@ -5329,15 +5432,15 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } uint64_t UImm = SignExtend64(Imm, BitSize); uint64_t Encoding; - if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { - MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(Encoding); - InsInstrs.push_back(MIB1); - InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); - } + if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + return; + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) + .addReg(ZeroReg) + .addImm(Encoding); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); break; } case MachineCombinerPattern::MULSUBW_OP1: @@ -5420,15 +5523,15 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } uint64_t UImm = SignExtend64(-Imm, BitSize); uint64_t Encoding; - if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { - MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(Encoding); - InsInstrs.push_back(MIB1); - InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); - } + if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + return; + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) + .addReg(ZeroReg) + .addImm(Encoding); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); break; } @@ -6076,12 +6179,50 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } + case MachineCombinerPattern::FMULv2i32_indexed_OP1: + case MachineCombinerPattern::FMULv2i32_indexed_OP2: { + unsigned IdxDupOp = + (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2; + genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed, + &AArch64::FPR128RegClass, MRI); + break; + } + case MachineCombinerPattern::FMULv2i64_indexed_OP1: + case MachineCombinerPattern::FMULv2i64_indexed_OP2: { + unsigned IdxDupOp = + (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2; + genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed, + &AArch64::FPR128RegClass, MRI); + break; + } + case MachineCombinerPattern::FMULv4i16_indexed_OP1: + case MachineCombinerPattern::FMULv4i16_indexed_OP2: { + unsigned IdxDupOp = + (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2; + genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed, + &AArch64::FPR128_loRegClass, MRI); + break; + } + case MachineCombinerPattern::FMULv4i32_indexed_OP1: + case MachineCombinerPattern::FMULv4i32_indexed_OP2: { + unsigned IdxDupOp = + (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2; + genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed, + &AArch64::FPR128RegClass, MRI); + break; + } + case MachineCombinerPattern::FMULv8i16_indexed_OP1: + case MachineCombinerPattern::FMULv8i16_indexed_OP2: { + unsigned IdxDupOp = + (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2; + genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed, + &AArch64::FPR128_loRegClass, MRI); + break; + } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion - // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and - // CodeGen/AArch64/urem-seteq-nonzero.ll. - // assert(MUL && "MUL was never set"); - DelInstrs.push_back(MUL); + if (MUL) + DelInstrs.push_back(MUL); DelInstrs.push_back(&Root); } @@ -6624,13 +6765,8 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front(); for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx(); Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) { - const std::vector<MCCFIInstruction> &CFIInstructions = - RepeatedSequenceLocs[0].getMF()->getFrameInstructions(); - if (MBBI->isCFIInstruction()) { - unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex(); - MCCFIInstruction CFI = CFIInstructions[CFIIndex]; + if (MBBI->isCFIInstruction()) CFICount++; - } MBBI++; } @@ -7212,7 +7348,8 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, .setMIFlags(MachineInstr::FrameSetup); // If v8.3a features are available we can replace a RET instruction by - // RETAA or RETAB and omit the AUT instructions + // RETAA or RETAB and omit the AUT instructions. In this case the + // DW_CFA_AARCH64_negate_ra_state can't be emitted. if (Subtarget.hasPAuth() && MBBAUT != MBB.end() && MBBAUT->getOpcode() == AArch64::RET) { BuildMI(MBB, MBBAUT, DL, @@ -7225,6 +7362,11 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP : AArch64::AUTIBSP)) .setMIFlag(MachineInstr::FrameDestroy); + unsigned CFIIndexAuth = + MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndexAuth) + .setMIFlags(MachineInstr::FrameDestroy); } } } @@ -7401,7 +7543,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( unsigned Reg = findRegisterToSaveLRTo(C); assert(Reg != 0 && "No callee-saved register available?"); - // Save and restore LR from that register. + // LR has to be a live in so that we can save it. + if (!MBB.isLiveIn(AArch64::LR)) + MBB.addLiveIn(AArch64::LR); + + // Save and restore LR from Reg. Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) .addReg(AArch64::XZR) .addReg(AArch64::LR) |
