diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.cpp | 1054 |
1 files changed, 612 insertions, 442 deletions
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 215e96a82d0e..5c35e5bcdd30 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Casting.h" @@ -82,6 +83,10 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); } + // Meta-instructions emit no code. + if (MI.isMetaInstruction()) + return 0; + // FIXME: We currently only handle pseudoinstructions that don't get expanded // before the assembly printer. unsigned NumBytes = 0; @@ -91,12 +96,6 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // Anything not explicitly designated otherwise is a normal 4-byte insn. NumBytes = 4; break; - case TargetOpcode::DBG_VALUE: - case TargetOpcode::EH_LABEL: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - NumBytes = 0; - break; case TargetOpcode::STACKMAP: // The upper bound for a stackmap intrinsic is the full length of its shadow NumBytes = StackMapOpers(&MI).getNumPatchBytes(); @@ -416,7 +415,7 @@ unsigned AArch64InstrInfo::insertBranch( // Find the original register that VReg is copied from. static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { - while (TargetRegisterInfo::isVirtualRegister(VReg)) { + while (Register::isVirtualRegister(VReg)) { const MachineInstr *DefMI = MRI.getVRegDef(VReg); if (!DefMI->isFullCopy()) return VReg; @@ -431,7 +430,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg = nullptr) { VReg = removeCopies(MRI, VReg); - if (!TargetRegisterInfo::isVirtualRegister(VReg)) + if (!Register::isVirtualRegister(VReg)) return 0; bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); @@ -574,7 +573,7 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, CC = AArch64CC::NE; break; } - unsigned SrcReg = Cond[2].getReg(); + Register SrcReg = Cond[2].getReg(); if (Is64Bit) { // cmp reg, #0 is actually subs xzr, reg, #0. MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); @@ -930,7 +929,7 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, } bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { const TargetRegisterInfo *TRI = &getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; @@ -1071,8 +1070,8 @@ static bool UpdateOperandRegClass(MachineInstr &Instr) { assert(MO.isReg() && "Operand has register constraints without being a register!"); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { if (!OpRegCstraints->contains(Reg)) return false; } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && @@ -1472,6 +1471,8 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return false; MachineBasicBlock &MBB = *MI.getParent(); + auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>(); + auto TRI = Subtarget.getRegisterInfo(); DebugLoc DL = MI.getDebugLoc(); if (MI.getOpcode() == AArch64::CATCHRET) { @@ -1497,21 +1498,32 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); const GlobalValue *GV = cast<GlobalValue>((*MI.memoperands_begin())->getValue()); const TargetMachine &TM = MBB.getParent()->getTarget(); - unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); + unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); const unsigned char MO_NC = AArch64II::MO_NC; if ((OpFlags & AArch64II::MO_GOT) != 0) { BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) .addGlobalAddress(GV, 0, OpFlags); - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addImm(0) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()); + } } else if (TM.getCodeModel() == CodeModel::Large) { + assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) .addImm(0); @@ -1538,10 +1550,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, LoFlags) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()); + } } MBB.erase(MI); @@ -1581,7 +1603,7 @@ bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { break; case TargetOpcode::COPY: { // GPR32 copies will by lowered to ORRXrs - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); return (AArch64::GPR32RegClass.contains(DstReg) || AArch64::GPR64RegClass.contains(DstReg)); } @@ -1611,7 +1633,7 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { break; case TargetOpcode::COPY: { // FPR64 copies will by lowered to ORR.16b - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); return (AArch64::FPR64RegClass.contains(DstReg) || AArch64::FPR128RegClass.contains(DstReg)); } @@ -1917,7 +1939,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { // e.g., ldr x0, [x0] // This case will never occur with an FI base. if (MI.getOperand(1).isReg()) { - unsigned BaseReg = MI.getOperand(1).getReg(); + Register BaseReg = MI.getOperand(1).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (MI.modifiesRegister(BaseReg, TRI)) return false; @@ -1928,6 +1950,17 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { if (isLdStPairSuppressed(MI)) return false; + // Do not pair any callee-save store/reload instructions in the + // prologue/epilogue if the CFI information encoded the operations as separate + // instructions, as that will cause the size of the actual prologue to mismatch + // with the prologue size recorded in the Windows CFI. + const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); + bool NeedsWinCFI = MAI->usesWindowsCFI() && + MI.getMF()->getFunction().needsUnwindTableEntry(); + if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || + MI.getFlag(MachineInstr::FrameDestroy))) + return false; + // On some CPUs quad load/store pairs are slower than two single load/stores. if (Subtarget.isPaired128Slow()) { switch (MI.getOpcode()) { @@ -2165,6 +2198,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, MinOffset = -256; MaxOffset = 255; break; + case AArch64::LDR_PXI: + case AArch64::STR_PXI: + Scale = Width = 2; + MinOffset = -256; + MaxOffset = 255; + break; + case AArch64::LDR_ZXI: + case AArch64::STR_ZXI: + Scale = Width = 16; + MinOffset = -256; + MaxOffset = 255; + break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: Scale = 16; @@ -2350,7 +2395,7 @@ static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, if (!SubIdx) return MIB.addReg(Reg, State); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); return MIB.addReg(Reg, State, SubIdx); } @@ -2474,6 +2519,27 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Copy a Predicate register by ORRing with itself. + if (AArch64::PPRRegClass.contains(DestReg) && + AArch64::PPRRegClass.contains(SrcReg)) { + assert(Subtarget.hasSVE() && "Unexpected SVE register."); + BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) + .addReg(SrcReg) // Pg + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + // Copy a Z register by ORRing with itself. + if (AArch64::ZPRRegClass.contains(DestReg) && + AArch64::ZPRRegClass.contains(SrcReg)) { + assert(Subtarget.hasSVE() && "Unexpected SVE register."); + BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (AArch64::GPR64spRegClass.contains(DestReg) && (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { @@ -2722,7 +2788,7 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineMemOperand *MMO) { unsigned SrcReg0 = SrcReg; unsigned SrcReg1 = SrcReg; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (Register::isPhysicalRegister(SrcReg)) { SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); SubIdx0 = 0; SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); @@ -2761,7 +2827,7 @@ void AArch64InstrInfo::storeRegToStackSlot( case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { Opc = AArch64::STRWui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (Register::isVirtualRegister(SrcReg)) MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); else assert(SrcReg != AArch64::WSP); @@ -2771,7 +2837,7 @@ void AArch64InstrInfo::storeRegToStackSlot( case 8: if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { Opc = AArch64::STRXui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (Register::isVirtualRegister(SrcReg)) MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); else assert(SrcReg != AArch64::SP); @@ -2852,7 +2918,7 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, unsigned DestReg0 = DestReg; unsigned DestReg1 = DestReg; bool IsUndef = true; - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) { + if (Register::isPhysicalRegister(DestReg)) { DestReg0 = TRI.getSubReg(DestReg, SubIdx0); SubIdx0 = 0; DestReg1 = TRI.getSubReg(DestReg, SubIdx1); @@ -2892,7 +2958,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { Opc = AArch64::LDRWui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) + if (Register::isVirtualRegister(DestReg)) MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); else assert(DestReg != AArch64::WSP); @@ -2902,7 +2968,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( case 8: if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { Opc = AArch64::LDRXui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) + if (Register::isVirtualRegister(DestReg)) MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); else assert(DestReg != AArch64::SP); @@ -2972,21 +3038,39 @@ void AArch64InstrInfo::loadRegFromStackSlot( MI.addMemOperand(MMO); } -void llvm::emitFrameOffset(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - unsigned DestReg, unsigned SrcReg, int Offset, - const TargetInstrInfo *TII, - MachineInstr::MIFlag Flag, bool SetNZCV, - bool NeedsWinCFI, bool *HasWinCFI) { - if (DestReg == SrcReg && Offset == 0) - return; - - assert((DestReg != AArch64::SP || Offset % 16 == 0) && - "SP increment/decrement not 16-byte aligned"); - - bool isSub = Offset < 0; - if (isSub) - Offset = -Offset; +// Helper function to emit a frame offset adjustment from a given +// pointer (SrcReg), stored into DestReg. This function is explicit +// in that it requires the opcode. +static void emitFrameOffsetAdj(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, int64_t Offset, unsigned Opc, + const TargetInstrInfo *TII, + MachineInstr::MIFlag Flag, bool NeedsWinCFI, + bool *HasWinCFI) { + int Sign = 1; + unsigned MaxEncoding, ShiftSize; + switch (Opc) { + case AArch64::ADDXri: + case AArch64::ADDSXri: + case AArch64::SUBXri: + case AArch64::SUBSXri: + MaxEncoding = 0xfff; + ShiftSize = 12; + break; + case AArch64::ADDVL_XXI: + case AArch64::ADDPL_XXI: + MaxEncoding = 31; + ShiftSize = 0; + if (Offset < 0) { + MaxEncoding = 32; + Sign = -1; + Offset = -Offset; + } + break; + default: + llvm_unreachable("Unsupported opcode"); + } // FIXME: If the offset won't fit in 24-bits, compute the offset into a // scratch register. If DestReg is a virtual register, use it as the @@ -2999,65 +3083,94 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, // of code. // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); - unsigned Opc; - if (SetNZCV) - Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; - else - Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; - const unsigned MaxEncoding = 0xfff; - const unsigned ShiftSize = 12; const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; - while (((unsigned)Offset) >= (1 << ShiftSize)) { - unsigned ThisVal; - if (((unsigned)Offset) > MaxEncodableValue) { - ThisVal = MaxEncodableValue; - } else { - ThisVal = Offset & MaxEncodableValue; + do { + unsigned ThisVal = std::min<unsigned>(Offset, MaxEncodableValue); + unsigned LocalShiftSize = 0; + if (ThisVal > MaxEncoding) { + ThisVal = ThisVal >> ShiftSize; + LocalShiftSize = ShiftSize; } assert((ThisVal >> ShiftSize) <= MaxEncoding && "Encoding cannot handle value that big"); - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(ThisVal >> ShiftSize) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) - .setMIFlag(Flag); - - if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) { + auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addImm(Sign * (int)ThisVal); + if (ShiftSize) + MBI = MBI.addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); + MBI = MBI.setMIFlag(Flag); + + if (NeedsWinCFI) { + assert(Sign == 1 && "SEH directives should always have a positive sign"); + int Imm = (int)(ThisVal << LocalShiftSize); + if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || + (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { + if (HasWinCFI) + *HasWinCFI = true; + if (Imm == 0) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); + else + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) + .addImm(Imm) + .setMIFlag(Flag); + assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to " + "emit a single SEH directive"); + } else if (DestReg == AArch64::SP) { + if (HasWinCFI) + *HasWinCFI = true; + assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) + .addImm(Imm) + .setMIFlag(Flag); + } if (HasWinCFI) *HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) - .addImm(ThisVal) - .setMIFlag(Flag); } SrcReg = DestReg; - Offset -= ThisVal; - if (Offset == 0) - return; - } - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(Offset) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) - .setMIFlag(Flag); + Offset -= ThisVal << LocalShiftSize; + } while (Offset); +} - if (NeedsWinCFI) { - if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || - (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { - if (HasWinCFI) - *HasWinCFI = true; - if (Offset == 0) - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)). - setMIFlag(Flag); - else - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)). - addImm(Offset).setMIFlag(Flag); - } else if (DestReg == AArch64::SP) { - if (HasWinCFI) - *HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)). - addImm(Offset).setMIFlag(Flag); +void llvm::emitFrameOffset(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + unsigned DestReg, unsigned SrcReg, + StackOffset Offset, const TargetInstrInfo *TII, + MachineInstr::MIFlag Flag, bool SetNZCV, + bool NeedsWinCFI, bool *HasWinCFI) { + int64_t Bytes, NumPredicateVectors, NumDataVectors; + Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors); + + // First emit non-scalable frame offsets, or a simple 'mov'. + if (Bytes || (!Offset && SrcReg != DestReg)) { + assert((DestReg != AArch64::SP || Bytes % 16 == 0) && + "SP increment/decrement not 16-byte aligned"); + unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; + if (Bytes < 0) { + Bytes = -Bytes; + Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; } + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, + NeedsWinCFI, HasWinCFI); + SrcReg = DestReg; + } + + assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && + "SetNZCV not supported with SVE vectors"); + assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) && + "WinCFI not supported with SVE vectors"); + + if (NumDataVectors) { + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, + AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr); + SrcReg = DestReg; + } + + if (NumPredicateVectors) { + assert(DestReg != AArch64::SP && "Unaligned access to SP"); + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, + AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr); } } @@ -3079,15 +3192,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // <rdar://problem/11522048> // if (MI.isFullCopy()) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (SrcReg == AArch64::SP && - TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) { MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); return nullptr; } - if (DstReg == AArch64::SP && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) { MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); return nullptr; } @@ -3127,14 +3238,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineBasicBlock &MBB = *MI.getParent(); const MachineOperand &DstMO = MI.getOperand(0); const MachineOperand &SrcMO = MI.getOperand(1); - unsigned DstReg = DstMO.getReg(); - unsigned SrcReg = SrcMO.getReg(); + Register DstReg = DstMO.getReg(); + Register SrcReg = SrcMO.getReg(); // This is slightly expensive to compute for physical regs since // getMinimalPhysRegClass is slow. auto getRegClass = [&](unsigned Reg) { - return TargetRegisterInfo::isVirtualRegister(Reg) - ? MRI.getRegClass(Reg) - : TRI.getMinimalPhysRegClass(Reg); + return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) + : TRI.getMinimalPhysRegClass(Reg); }; if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { @@ -3159,8 +3269,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // // STRXui %xzr, %stack.0 // - if (IsSpill && DstMO.isUndef() && - TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) { assert(SrcMO.getSubReg() == 0 && "Unexpected subreg on physical register"); const TargetRegisterClass *SpillRC; @@ -3243,10 +3352,23 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( return nullptr; } -int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, +static bool isSVEScaledImmInstruction(unsigned Opcode) { + switch (Opcode) { + case AArch64::LDR_ZXI: + case AArch64::STR_ZXI: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: + return true; + default: + return false; + } +} + +int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, + StackOffset &SOffset, bool *OutUseUnscaledOp, unsigned *OutUnscaledOp, - int *EmittableOffset) { + int64_t *EmittableOffset) { // Set output values in case of early exit. if (EmittableOffset) *EmittableOffset = 0; @@ -3285,6 +3407,10 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); // Construct the complete offset. + bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode()); + int64_t Offset = + IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes()); + const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); Offset += ImmOpnd.getImm() * Scale; @@ -3304,7 +3430,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, "Cannot have remainder when using unscaled op"); assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); - int NewOffset = Offset / Scale; + int64_t NewOffset = Offset / Scale; if (MinOff <= NewOffset && NewOffset <= MaxOff) Offset = Remainder; else { @@ -3319,27 +3445,33 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, if (OutUnscaledOp && UnscaledOp) *OutUnscaledOp = *UnscaledOp; + if (IsMulVL) + SOffset = StackOffset(Offset, MVT::nxv1i8) + + StackOffset(SOffset.getBytes(), MVT::i8); + else + SOffset = StackOffset(Offset, MVT::i8) + + StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8); return AArch64FrameOffsetCanUpdate | - (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); + (SOffset ? 0 : AArch64FrameOffsetIsLegal); } bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, + unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII) { unsigned Opcode = MI.getOpcode(); unsigned ImmIdx = FrameRegIdx + 1; if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { - Offset += MI.getOperand(ImmIdx).getImm(); + Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8); emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), MI.getOperand(0).getReg(), FrameReg, Offset, TII, MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); MI.eraseFromParent(); - Offset = 0; + Offset = StackOffset(); return true; } - int NewOffset; + int64_t NewOffset; unsigned UnscaledOp; bool UseUnscaledOp; int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, @@ -3352,7 +3484,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MI.setDesc(TII->get(UnscaledOp)); MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); - return Offset == 0; + return !Offset; } return false; @@ -3428,13 +3560,19 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: break; + case AArch64::FADDHrr: case AArch64::FADDSrr: case AArch64::FADDDrr: + case AArch64::FADDv4f16: + case AArch64::FADDv8f16: case AArch64::FADDv2f32: case AArch64::FADDv2f64: case AArch64::FADDv4f32: + case AArch64::FSUBHrr: case AArch64::FSUBSrr: case AArch64::FSUBDrr: + case AArch64::FSUBv4f16: + case AArch64::FSUBv8f16: case AArch64::FSUBv2f32: case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: @@ -3459,7 +3597,7 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineInstr *MI = nullptr; - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) MI = MRI.getUniqueVRegDef(MO.getReg()); // And it needs to be in the trace (otherwise, it won't have a depth). if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) @@ -3544,86 +3682,48 @@ static bool getMaddPatterns(MachineInstr &Root, Opc = NewOpc; } + auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, + MachineCombinerPattern Pattern) { + if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { + Patterns.push_back(Pattern); + Found = true; + } + }; + + typedef MachineCombinerPattern MCP; + switch (Opc) { default: break; case AArch64::ADDWrr: assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && "ADDWrr does not have register operands"); - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1); + setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2); break; case AArch64::ADDXrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1); + setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2); break; case AArch64::SUBWrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1); + setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2); break; case AArch64::SUBXrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1); + setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2); break; case AArch64::ADDWri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1); break; case AArch64::ADDXri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1); break; case AArch64::SUBWri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1); break; case AArch64::SUBXri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); break; } return Found; @@ -3640,204 +3740,135 @@ static bool getFMAPatterns(MachineInstr &Root, MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; + auto Match = [&](int Opcode, int Operand, + MachineCombinerPattern Pattern) -> bool { + if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { + Patterns.push_back(Pattern); + return true; + } + return false; + }; + + typedef MachineCombinerPattern MCP; + switch (Root.getOpcode()) { default: assert(false && "Unsupported FP instruction in combiner\n"); break; + case AArch64::FADDHrr: + assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && + "FADDHrr does not have register operands"); + + Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1); + Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2); + break; case AArch64::FADDSrr: assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && - "FADDWrr does not have register operands"); - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); - Found = true; - } + "FADDSrr does not have register operands"); + + Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) || + Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1); + + Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) || + Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2); break; case AArch64::FADDDrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); - Found = true; - } + Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) || + Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1); + + Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) || + Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2); + break; + case AArch64::FADDv4f16: + Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) || + Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1); + + Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) || + Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2); + break; + case AArch64::FADDv8f16: + Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) || + Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1); + + Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) || + Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2); break; case AArch64::FADDv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); - Found = true; - } + Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) || + Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1); + + Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) || + Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2); break; case AArch64::FADDv2f64: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); - Found = true; - } + Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) || + Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1); + + Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) || + Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2); break; case AArch64::FADDv4f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); - Found = true; - } - break; + Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) || + Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1); + Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) || + Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2); + break; + case AArch64::FSUBHrr: + Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1); + Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2); + Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1); + break; case AArch64::FSUBSrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); - Found = true; - } + Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1); + + Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) || + Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2); + + Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1); break; case AArch64::FSUBDrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); - Found = true; - } + Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1); + + Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) || + Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2); + + Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1); + break; + case AArch64::FSUBv4f16: + Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || + Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); + + Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || + Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); + break; + case AArch64::FSUBv8f16: + Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || + Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2); + + Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) || + Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1); break; case AArch64::FSUBv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) || + Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2); + + Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || + Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1); break; case AArch64::FSUBv2f64: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) || + Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2); + + Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) || + Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1); break; case AArch64::FSUBv4f32: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) || + Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2); + + Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) || + Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1); break; } return Found; @@ -3851,6 +3882,10 @@ bool AArch64InstrInfo::isThroughputPattern( switch (Pattern) { default: break; + case MachineCombinerPattern::FMULADDH_OP1: + case MachineCombinerPattern::FMULADDH_OP2: + case MachineCombinerPattern::FMULSUBH_OP1: + case MachineCombinerPattern::FMULSUBH_OP2: case MachineCombinerPattern::FMULADDS_OP1: case MachineCombinerPattern::FMULADDS_OP2: case MachineCombinerPattern::FMULSUBS_OP1: @@ -3859,12 +3894,21 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMULADDD_OP2: case MachineCombinerPattern::FMULSUBD_OP1: case MachineCombinerPattern::FMULSUBD_OP2: + case MachineCombinerPattern::FNMULSUBH_OP1: case MachineCombinerPattern::FNMULSUBS_OP1: case MachineCombinerPattern::FNMULSUBD_OP1: + case MachineCombinerPattern::FMLAv4i16_indexed_OP1: + case MachineCombinerPattern::FMLAv4i16_indexed_OP2: + case MachineCombinerPattern::FMLAv8i16_indexed_OP1: + case MachineCombinerPattern::FMLAv8i16_indexed_OP2: case MachineCombinerPattern::FMLAv1i32_indexed_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP2: case MachineCombinerPattern::FMLAv1i64_indexed_OP1: case MachineCombinerPattern::FMLAv1i64_indexed_OP2: + case MachineCombinerPattern::FMLAv4f16_OP2: + case MachineCombinerPattern::FMLAv4f16_OP1: + case MachineCombinerPattern::FMLAv8f16_OP1: + case MachineCombinerPattern::FMLAv8f16_OP2: case MachineCombinerPattern::FMLAv2f32_OP2: case MachineCombinerPattern::FMLAv2f32_OP1: case MachineCombinerPattern::FMLAv2f64_OP1: @@ -3877,10 +3921,18 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMLAv4f32_OP2: case MachineCombinerPattern::FMLAv4i32_indexed_OP1: case MachineCombinerPattern::FMLAv4i32_indexed_OP2: + case MachineCombinerPattern::FMLSv4i16_indexed_OP1: + case MachineCombinerPattern::FMLSv4i16_indexed_OP2: + case MachineCombinerPattern::FMLSv8i16_indexed_OP1: + case MachineCombinerPattern::FMLSv8i16_indexed_OP2: case MachineCombinerPattern::FMLSv1i32_indexed_OP2: case MachineCombinerPattern::FMLSv1i64_indexed_OP2: case MachineCombinerPattern::FMLSv2i32_indexed_OP2: case MachineCombinerPattern::FMLSv2i64_indexed_OP2: + case MachineCombinerPattern::FMLSv4f16_OP1: + case MachineCombinerPattern::FMLSv4f16_OP2: + case MachineCombinerPattern::FMLSv8f16_OP1: + case MachineCombinerPattern::FMLSv8f16_OP2: case MachineCombinerPattern::FMLSv2f32_OP2: case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv4i32_indexed_OP2: @@ -3933,15 +3985,15 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind = FMAInstKind::Default, - const unsigned *ReplacedAddend = nullptr) { + const Register *ReplacedAddend = nullptr) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); - unsigned ResultReg = Root.getOperand(0).getReg(); - unsigned SrcReg0 = MUL->getOperand(1).getReg(); + Register ResultReg = Root.getOperand(0).getReg(); + Register SrcReg0 = MUL->getOperand(1).getReg(); bool Src0IsKill = MUL->getOperand(1).isKill(); - unsigned SrcReg1 = MUL->getOperand(2).getReg(); + Register SrcReg1 = MUL->getOperand(2).getReg(); bool Src1IsKill = MUL->getOperand(2).isKill(); unsigned SrcReg2; @@ -3955,13 +4007,13 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); } - if (TargetRegisterInfo::isVirtualRegister(ResultReg)) + if (Register::isVirtualRegister(ResultReg)) MRI.constrainRegClass(ResultReg, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) + if (Register::isVirtualRegister(SrcReg0)) MRI.constrainRegClass(SrcReg0, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) + if (Register::isVirtualRegister(SrcReg1)) MRI.constrainRegClass(SrcReg1, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) + if (Register::isVirtualRegister(SrcReg2)) MRI.constrainRegClass(SrcReg2, RC); MachineInstrBuilder MIB; @@ -4015,19 +4067,19 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, assert(IdxMulOpd == 1 || IdxMulOpd == 2); MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); - unsigned ResultReg = Root.getOperand(0).getReg(); - unsigned SrcReg0 = MUL->getOperand(1).getReg(); + Register ResultReg = Root.getOperand(0).getReg(); + Register SrcReg0 = MUL->getOperand(1).getReg(); bool Src0IsKill = MUL->getOperand(1).isKill(); - unsigned SrcReg1 = MUL->getOperand(2).getReg(); + Register SrcReg1 = MUL->getOperand(2).getReg(); bool Src1IsKill = MUL->getOperand(2).isKill(); - if (TargetRegisterInfo::isVirtualRegister(ResultReg)) + if (Register::isVirtualRegister(ResultReg)) MRI.constrainRegClass(ResultReg, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) + if (Register::isVirtualRegister(SrcReg0)) MRI.constrainRegClass(SrcReg0, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) + if (Register::isVirtualRegister(SrcReg1)) MRI.constrainRegClass(SrcReg1, RC); - if (TargetRegisterInfo::isVirtualRegister(VR)) + if (Register::isVirtualRegister(VR)) MRI.constrainRegClass(VR, RC); MachineInstrBuilder MIB = @@ -4116,7 +4168,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(OrrRC); + Register NewVR = MRI.createVirtualRegister(OrrRC); uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { @@ -4158,7 +4210,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(SubRC); + Register NewVR = MRI.createVirtualRegister(SubRC); // SUB NewVR, 0, C MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) @@ -4208,7 +4260,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(OrrRC); + Register NewVR = MRI.createVirtualRegister(OrrRC); uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { unsigned Val = Root.getOperand(3).getImm(); @@ -4228,34 +4280,35 @@ void AArch64InstrInfo::genAlternativeCodeSequence( break; } // Floating Point Support + case MachineCombinerPattern::FMULADDH_OP1: + Opc = AArch64::FMADDHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FMULADDS_OP1: + Opc = AArch64::FMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FMULADDD_OP1: - // MUL I=A,B,0 - // ADD R,I,C - // ==> MADD R,A,B,C - // --- Create(MADD); - if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { - Opc = AArch64::FMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FMADDDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; + + case MachineCombinerPattern::FMULADDH_OP2: + Opc = AArch64::FMADDHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; case MachineCombinerPattern::FMULADDS_OP2: + Opc = AArch64::FMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; case MachineCombinerPattern::FMULADDD_OP2: - // FMUL I=A,B,0 - // FADD R,C,I - // ==> FMADD R,A,B,C - // --- Create(FMADD); - if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { - Opc = AArch64::FMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FMADDDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; @@ -4285,6 +4338,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Indexed); break; + case MachineCombinerPattern::FMLAv4i16_indexed_OP1: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv4f16_OP1: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv4i16_indexed_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv4f16_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv2i32_indexed_OP1: case MachineCombinerPattern::FMLAv2f32_OP1: RC = &AArch64::FPR64RegClass; @@ -4312,6 +4390,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMLAv8i16_indexed_OP1: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv8f16_OP1: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv8i16_indexed_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv8f16_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv2i64_indexed_OP1: case MachineCombinerPattern::FMLAv2f64_OP1: RC = &AArch64::FPR128RegClass; @@ -4367,56 +4470,53 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMULSUBH_OP1: + Opc = AArch64::FNMSUBHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FMULSUBS_OP1: - case MachineCombinerPattern::FMULSUBD_OP1: { - // FMUL I=A,B,0 - // FSUB R,I,C - // ==> FNMSUB R,A,B,C // = -C + A*B - // --- Create(FNMSUB); - if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { - Opc = AArch64::FNMSUBSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FNMSUBDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FNMSUBSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::FMULSUBD_OP1: + Opc = AArch64::FNMSUBDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - } + case MachineCombinerPattern::FNMULSUBH_OP1: + Opc = AArch64::FNMADDHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FNMULSUBS_OP1: - case MachineCombinerPattern::FNMULSUBD_OP1: { - // FNMUL I=A,B,0 - // FSUB R,I,C - // ==> FNMADD R,A,B,C // = -A*B - C - // --- Create(FNMADD); - if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { - Opc = AArch64::FNMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FNMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FNMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::FNMULSUBD_OP1: + Opc = AArch64::FNMADDDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - } + case MachineCombinerPattern::FMULSUBH_OP2: + Opc = AArch64::FMSUBHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; case MachineCombinerPattern::FMULSUBS_OP2: - case MachineCombinerPattern::FMULSUBD_OP2: { - // FMUL I=A,B,0 - // FSUB R,C,I - // ==> FMSUB R,A,B,C (computes C - A*B) - // --- Create(FMSUB); - if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { - Opc = AArch64::FMSUBSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMSUBDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FMSUBSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::FMULSUBD_OP2: + Opc = AArch64::FMSUBDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - } case MachineCombinerPattern::FMLSv1i32_indexed_OP2: Opc = AArch64::FMLSv1i32_indexed; @@ -4432,6 +4532,39 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Indexed); break; + case MachineCombinerPattern::FMLSv4f16_OP1: + case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { + RC = &AArch64::FPR64RegClass; + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } else { + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } + break; + } + case MachineCombinerPattern::FMLSv4f16_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLSv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLSv4i16_indexed_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLSv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLSv2f32_OP2: case MachineCombinerPattern::FMLSv2i32_indexed_OP2: RC = &AArch64::FPR64RegClass; @@ -4446,6 +4579,39 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMLSv8f16_OP1: + case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { + RC = &AArch64::FPR128RegClass; + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } else { + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } + break; + } + case MachineCombinerPattern::FMLSv8f16_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLSv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLSv8i16_indexed_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLSv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv2i64_indexed_OP2: RC = &AArch64::FPR128RegClass; @@ -4476,7 +4642,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv2f32_OP1: case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { RC = &AArch64::FPR64RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) .add(Root.getOperand(2)); @@ -4496,7 +4662,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv4f32_OP1: case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { RC = &AArch64::FPR128RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) .add(Root.getOperand(2)); @@ -4516,7 +4682,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv2f64_OP1: case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { RC = &AArch64::FPR128RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) .add(Root.getOperand(2)); @@ -4617,15 +4783,15 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); - unsigned VReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VReg)) + Register VReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(VReg)) return false; MachineInstr *DefMI = MRI->getVRegDef(VReg); // Look through COPY instructions to find definition. while (DefMI->isCopy()) { - unsigned CopyVReg = DefMI->getOperand(1).getReg(); + Register CopyVReg = DefMI->getOperand(1).getReg(); if (!MRI->hasOneNonDBGUse(CopyVReg)) return false; if (!MRI->hasOneDef(CopyVReg)) @@ -4653,8 +4819,8 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { return false; MachineOperand &MO = DefMI->getOperand(1); - unsigned NewReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NewReg)) + Register NewReg = MO.getReg(); + if (!Register::isVirtualRegister(NewReg)) return false; assert(!MRI->def_empty(NewReg) && "Register must be defined."); @@ -4737,9 +4903,13 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { static const std::pair<unsigned, const char *> TargetFlags[] = { {MO_COFFSTUB, "aarch64-coffstub"}, - {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, - {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"}, - {MO_DLLIMPORT, "aarch64-dllimport"}}; + {MO_GOT, "aarch64-got"}, + {MO_NC, "aarch64-nc"}, + {MO_S, "aarch64-s"}, + {MO_TLS, "aarch64-tls"}, + {MO_DLLIMPORT, "aarch64-dllimport"}, + {MO_PREL, "aarch64-prel"}, + {MO_TAGGED, "aarch64-tagged"}}; return makeArrayRef(TargetFlags); } |