diff options
Diffstat (limited to 'lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 412 |
1 files changed, 327 insertions, 85 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index bbebed59c851..222aa85856a2 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1,9 +1,8 @@ //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -134,7 +133,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { - if (Subtarget.isThumb2() || Subtarget.hasVFP2()) + if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()) return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } @@ -707,15 +706,7 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (MCID.getSize()) return MCID.getSize(); - // If this machine instr is an inline asm, measure it. - if (MI.getOpcode() == ARM::INLINEASM) { - unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); - if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) - Size = alignTo(Size, 4); - return Size; - } - unsigned Opc = MI.getOpcode(); - switch (Opc) { + switch (MI.getOpcode()) { default: // pseudo-instruction sizes are zero. return 0; @@ -752,6 +743,14 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { return 12; case ARM::SPACE: return MI.getOperand(1).getImm(); + case ARM::INLINEASM: + case ARM::INLINEASM_BR: { + // If this machine instr is an inline asm, measure it. + unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); + if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) + Size = alignTo(Size, 4); + return Size; + } } } @@ -806,6 +805,28 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); } +void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { + MIB.addImm(ARMVCC::None); + MIB.addReg(0); +} + +void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, + unsigned DestReg) { + addUnpredicatedMveVpredNOp(MIB); + MIB.addReg(DestReg, RegState::Undef); +} + +void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) { + MIB.addImm(Cond); + MIB.addReg(ARM::VPR, RegState::Implicit); +} + +void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, + unsigned Cond, unsigned Inactive) { + addPredicatedMveVpredNOp(MIB, Cond); + MIB.addReg(Inactive); +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, @@ -831,17 +852,20 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVRS; else if (SPRDest && GPRSrc) Opc = ARM::VMOVSR; - else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP()) + else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64()) Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq; + Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; if (Opc) { MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); MIB.addReg(SrcReg, getKillRegState(KillSrc)); - if (Opc == ARM::VORRq) + if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) MIB.addReg(SrcReg, getKillRegState(KillSrc)); - MIB.add(predOps(ARMCC::AL)); + if (Opc == ARM::MVE_VORR) + addUnpredicatedMveVpredROp(MIB, DestReg); + else + MIB.add(predOps(ARMCC::AL)); return; } @@ -852,11 +876,11 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Use VORRq when possible. if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { - Opc = ARM::VORRq; + Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; BeginIdx = ARM::qsub_0; SubRegs = 2; } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { - Opc = ARM::VORRq; + Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; BeginIdx = ARM::qsub_0; SubRegs = 4; // Fall back to VMOVD. @@ -891,7 +915,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BeginIdx = ARM::dsub_0; SubRegs = 4; Spacing = 2; - } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) { + } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && + !Subtarget.hasFP64()) { Opc = ARM::VMOVS; BeginIdx = ARM::ssub_0; SubRegs = 2; @@ -901,6 +926,30 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (DestReg == ARM::CPSR) { copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); return; + } else if (DestReg == ARM::VPR) { + assert(ARM::GPRRegClass.contains(SrcReg)); + BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); + return; + } else if (SrcReg == ARM::VPR) { + assert(ARM::GPRRegClass.contains(DestReg)); + BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); + return; + } else if (DestReg == ARM::FPSCR_NZCV) { + assert(ARM::GPRRegClass.contains(SrcReg)); + BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); + return; + } else if (SrcReg == ARM::FPSCR_NZCV) { + assert(ARM::GPRRegClass.contains(DestReg)); + BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); + return; } assert(Opc && "Impossible reg-to-reg copy"); @@ -925,10 +974,15 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, DstRegs.insert(Dst); #endif Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); - // VORR takes two source operands. - if (Opc == ARM::VORRq) + // VORR (NEON or MVE) takes two source operands. + if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) { Mov.addReg(Src); - Mov = Mov.add(predOps(ARMCC::AL)); + } + // MVE VORR takes predicate operands in place of an ordinary condition. + if (Opc == ARM::MVE_VORR) + addUnpredicatedMveVpredROp(Mov, Dst); + else + Mov = Mov.add(predOps(ARMCC::AL)); // MOVr can set CC. if (Opc == ARM::MOVr) Mov = Mov.add(condCodeOp()); @@ -1010,6 +1064,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(0) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); + } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else llvm_unreachable("Unknown reg class!"); break; @@ -1042,7 +1103,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::DPairRegClass.hasSubClassEq(RC)) { + if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64)) @@ -1058,6 +1119,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addMemOperand(MMO) .add(predOps(ARMCC::AL)); } + } else if (ARM::QPRRegClass.hasSubClassEq(RC) && + Subtarget.hasMVEIntegerOps()) { + auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32)); + MIB.addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + addUnpredicatedMveVpredNOp(MIB); } else llvm_unreachable("Unknown reg class!"); break; @@ -1155,6 +1224,13 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, return MI.getOperand(0).getReg(); } break; + case ARM::VSTR_P0_off: + if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && + MI.getOperand(1).getImm() == 0) { + FrameIndex = MI.getOperand(0).getIndex(); + return ARM::P0; + } + break; case ARM::VST1q64: case ARM::VST1d64TPseudo: case ARM::VST1d64QPseudo: @@ -1177,7 +1253,8 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { SmallVector<const MachineMemOperand *, 1> Accesses; - if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) { + if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) && + Accesses.size() == 1) { FrameIndex = cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) ->getFrameIndex(); @@ -1224,6 +1301,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(0) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); + } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else llvm_unreachable("Unknown reg class!"); break; @@ -1260,7 +1343,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::DPairRegClass.hasSubClassEq(RC)) { + if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI) @@ -1273,6 +1356,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addMemOperand(MMO) .add(predOps(ARMCC::AL)); } + } else if (ARM::QPRRegClass.hasSubClassEq(RC) && + Subtarget.hasMVEIntegerOps()) { + auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg); + MIB.addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + addUnpredicatedMveVpredNOp(MIB); } else llvm_unreachable("Unknown reg class!"); break; @@ -1369,6 +1459,13 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return MI.getOperand(0).getReg(); } break; + case ARM::VLDR_P0_off: + if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && + MI.getOperand(1).getImm() == 0) { + FrameIndex = MI.getOperand(0).getIndex(); + return ARM::P0; + } + break; case ARM::VLD1q64: case ARM::VLD1d8TPseudo: case ARM::VLD1d16TPseudo: @@ -1397,7 +1494,8 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { SmallVector<const MachineMemOperand *, 1> Accesses; - if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) { + if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) && + Accesses.size() == 1) { FrameIndex = cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) ->getFrameIndex(); @@ -1480,7 +1578,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP()) + if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64()) return false; // Look for a copy between even S-registers. That is where we keep floats @@ -1898,24 +1996,15 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, // If we are optimizing for size, see if the branch in the predecessor can be // lowered to cbn?z by the constant island lowering pass, and return false if // so. This results in a shorter instruction sequence. - if (MBB.getParent()->getFunction().optForSize()) { + if (MBB.getParent()->getFunction().hasOptSize()) { MachineBasicBlock *Pred = *MBB.pred_begin(); if (!Pred->empty()) { MachineInstr *LastMI = &*Pred->rbegin(); if (LastMI->getOpcode() == ARM::t2Bcc) { - MachineBasicBlock::iterator CmpMI = LastMI; - if (CmpMI != Pred->begin()) { - --CmpMI; - if (CmpMI->getOpcode() == ARM::tCMPi8 || - CmpMI->getOpcode() == ARM::t2CMPri) { - unsigned Reg = CmpMI->getOperand(0).getReg(); - unsigned PredReg = 0; - ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg); - if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && - isARMLowRegister(Reg)) - return false; - } - } + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI); + if (CmpMI) + return false; } } } @@ -1932,6 +2021,15 @@ isProfitableToIfCvt(MachineBasicBlock &TBB, if (!TCycles) return false; + // In thumb code we often end up trading one branch for a IT block, and + // if we are cloning the instruction can increase code size. Prevent + // blocks with multiple predecesors from being ifcvted to prevent this + // cloning. + if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) { + if (TBB.pred_size() != 1 || FBB.pred_size() != 1) + return false; + } + // Attempt to estimate the relative costs of predication versus branching. // Here we scale up each component of UnpredCost to avoid precision issue when // scaling TCycles/FCycles by Probability. @@ -2040,9 +2138,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, /// Identify instructions that can be folded into a MOVCC instruction, and /// return the defining instruction. -static MachineInstr *canFoldIntoMOVCC(unsigned Reg, - const MachineRegisterInfo &MRI, - const TargetInstrInfo *TII) { +MachineInstr * +ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) const { if (!TargetRegisterInfo::isVirtualRegister(Reg)) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) @@ -2050,8 +2148,8 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return nullptr; - // MI is folded into the MOVCC by predicating it. - if (!MI->isPredicable()) + // Check if MI can be predicated and folded into the MOVCC. + if (!isPredicable(*MI)) return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. @@ -2266,7 +2364,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction().optForMinSize()) + if (!Subtarget.hasMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -2332,6 +2430,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; --CurRegEnc) { unsigned CurReg = RegClass->getRegister(CurRegEnc); + if (IsT1PushPop && CurReg > ARM::R7) + continue; if (!IsPop) { // Pushing any register is completely harmless, mark the register involved // as undef since we don't care about its value and must not restore it @@ -2389,7 +2489,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, bool isSub = false; // Memory operands in inline assembly always use AddrMode2. - if (Opcode == ARM::INLINEASM) + if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) AddrMode = ARMII::AddrMode2; if (Opcode == ARM::ADDri) { @@ -2473,6 +2573,15 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, NumBits = 8; Scale = 2; break; + case ARMII::AddrModeT2_i7: + case ARMII::AddrModeT2_i7s2: + case ARMII::AddrModeT2_i7s4: + ImmIdx = FrameRegIdx+1; + InstrOffs = MI.getOperand(ImmIdx).getImm(); + NumBits = 7; + Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 : + AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1); + break; default: llvm_unreachable("Unsupported addressing mode!"); } @@ -2543,6 +2652,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, return true; case ARM::CMPrr: case ARM::t2CMPrr: + case ARM::tCMPr: SrcReg = MI.getOperand(0).getReg(); SrcReg2 = MI.getOperand(1).getReg(); CmpMask = ~0; @@ -2619,32 +2729,62 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { /// This function can be extended later on. inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, unsigned SrcReg, unsigned SrcReg2, - int ImmValue, const MachineInstr *OI) { - if ((CmpI->getOpcode() == ARM::CMPrr || - CmpI->getOpcode() == ARM::t2CMPrr) && - (OI->getOpcode() == ARM::SUBrr || - OI->getOpcode() == ARM::t2SUBrr) && + int ImmValue, const MachineInstr *OI, + bool &IsThumb1) { + if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && + (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) && ((OI->getOperand(1).getReg() == SrcReg && OI->getOperand(2).getReg() == SrcReg2) || (OI->getOperand(1).getReg() == SrcReg2 && - OI->getOperand(2).getReg() == SrcReg))) + OI->getOperand(2).getReg() == SrcReg))) { + IsThumb1 = false; return true; + } - if ((CmpI->getOpcode() == ARM::CMPri || - CmpI->getOpcode() == ARM::t2CMPri) && - (OI->getOpcode() == ARM::SUBri || - OI->getOpcode() == ARM::t2SUBri) && + if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr && + ((OI->getOperand(2).getReg() == SrcReg && + OI->getOperand(3).getReg() == SrcReg2) || + (OI->getOperand(2).getReg() == SrcReg2 && + OI->getOperand(3).getReg() == SrcReg))) { + IsThumb1 = true; + return true; + } + + if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) && + (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) && OI->getOperand(1).getReg() == SrcReg && - OI->getOperand(2).getImm() == ImmValue) + OI->getOperand(2).getImm() == ImmValue) { + IsThumb1 = false; + return true; + } + + if (CmpI->getOpcode() == ARM::tCMPi8 && + (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) && + OI->getOperand(2).getReg() == SrcReg && + OI->getOperand(3).getImm() == ImmValue) { + IsThumb1 = true; return true; + } if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && OI->getOperand(0).isReg() && OI->getOperand(1).isReg() && OI->getOperand(0).getReg() == SrcReg && - OI->getOperand(1).getReg() == SrcReg2) + OI->getOperand(1).getReg() == SrcReg2) { + IsThumb1 = false; + return true; + } + + if (CmpI->getOpcode() == ARM::tCMPr && + (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 || + OI->getOpcode() == ARM::tADDrr) && + OI->getOperand(0).getReg() == SrcReg && + OI->getOperand(2).getReg() == SrcReg2) { + IsThumb1 = true; return true; + } + return false; } @@ -2662,6 +2802,17 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { case ARM::tSUBi3: case ARM::tSUBi8: case ARM::tMUL: + case ARM::tADC: + case ARM::tSBC: + case ARM::tRSB: + case ARM::tAND: + case ARM::tORR: + case ARM::tEOR: + case ARM::tBIC: + case ARM::tMVN: + case ARM::tASRri: + case ARM::tASRrr: + case ARM::tROR: IsThumb1 = true; LLVM_FALLTHROUGH; case ARM::RSBrr: @@ -2761,7 +2912,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. // Thus we cannot return here. if (CmpInstr.getOpcode() == ARM::CMPri || - CmpInstr.getOpcode() == ARM::t2CMPri) + CmpInstr.getOpcode() == ARM::t2CMPri || + CmpInstr.getOpcode() == ARM::tCMPi8) MI = nullptr; else return false; @@ -2783,20 +2935,22 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( // CMP. This peephole works on the vregs, so is still in SSA form. As a // consequence, the movs won't redefine/kill the MUL operands which would // make this reordering illegal. + const TargetRegisterInfo *TRI = &getRegisterInfo(); if (MI && IsThumb1) { --I; - bool CanReorder = true; - const bool HasStmts = I != E; - for (; I != E; --I) { - if (I->getOpcode() != ARM::tMOVi8) { - CanReorder = false; - break; + if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) { + bool CanReorder = true; + for (; I != E; --I) { + if (I->getOpcode() != ARM::tMOVi8) { + CanReorder = false; + break; + } + } + if (CanReorder) { + MI = MI->removeFromParent(); + E = CmpInstr; + CmpInstr.getParent()->insert(E, MI); } - } - if (HasStmts && CanReorder) { - MI = MI->removeFromParent(); - E = CmpInstr; - CmpInstr.getParent()->insert(E, MI); } I = CmpInstr; E = MI; @@ -2804,12 +2958,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( // Check that CPSR isn't set between the comparison instruction and the one we // want to change. At the same time, search for SubAdd. - const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool SubAddIsThumb1 = false; do { const MachineInstr &Instr = *--I; // Check whether CmpInstr can be made redundant by the current instruction. - if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) { + if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr, + SubAddIsThumb1)) { SubAdd = &*I; break; } @@ -2824,14 +2979,25 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( // change. We can't do this transformation. return false; - } while (I != B); + if (I == B) { + // In some cases, we scan the use-list of an instruction for an AND; + // that AND is in the same BB, but may not be scheduled before the + // corresponding TST. In that case, bail out. + // + // FIXME: We could try to reschedule the AND. + return false; + } + } while (true); // Return false if no candidates exist. if (!MI && !SubAdd) return false; - // The single candidate is called MI. - if (!MI) MI = SubAdd; + // If we found a SubAdd, use it as it will be closer to the CMP + if (SubAdd) { + MI = SubAdd; + IsThumb1 = SubAddIsThumb1; + } // We can't use a predicated instruction - it doesn't always write the flags. if (isPredicated(*MI)) @@ -2899,9 +3065,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( // operands will be modified. unsigned Opc = SubAdd->getOpcode(); bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || - Opc == ARM::SUBri || Opc == ARM::t2SUBri; - if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 && - SubAdd->getOperand(2).getReg() == SrcReg)) { + Opc == ARM::SUBri || Opc == ARM::t2SUBri || + Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 || + Opc == ARM::tSUBi8; + unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2; + if (!IsSub || + (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 && + SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) { // VSel doesn't support condition code update. if (IsInstrVSel) return false; @@ -2979,9 +3149,10 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { ++Next; unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; + bool IsThumb1; if (Next != MI.getParent()->end() && analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) && - isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI)) + isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1)) return false; return true; } @@ -3372,7 +3543,12 @@ unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { I != E; ++I) { Size += (*I)->getSize(); } - return Size / 4; + // FIXME: The scheduler currently can't handle values larger than 16. But + // the values can actually go up to 32 for floating-point load/store + // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory + // operations isn't right; we could end up with "extra" memory operands for + // various reasons, like tail merge merging two memory operations. + return std::min(Size / 4, 16U); } static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, @@ -4093,7 +4269,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl( // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI.getParent()->getParent(); - // FIXME: Use Function::optForSize(). + // FIXME: Use Function::hasOptSize(). if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize)) --Latency; } @@ -4517,6 +4693,31 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; return false; } + if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) { + // Make sure we don't generate a lo-lo mov that isn't supported. + if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) && + !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) { + ErrInfo = "Non-flag-setting Thumb1 mov is v6-only"; + return false; + } + } + if (MI.getOpcode() == ARM::tPUSH || + MI.getOpcode() == ARM::tPOP || + MI.getOpcode() == ARM::tPOP_RET) { + for (int i = 2, e = MI.getNumOperands(); i < e; ++i) { + if (MI.getOperand(i).isImplicit() || + !MI.getOperand(i).isReg()) + continue; + unsigned Reg = MI.getOperand(i).getReg(); + if (Reg < ARM::R0 || Reg > ARM::R7) { + if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && + !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { + ErrInfo = "Unsupported register in Thumb1 push/pop"; + return false; + } + } + } + } return true; } @@ -5107,3 +5308,44 @@ ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { {MO_NONLAZY, "arm-nonlazy"}}; return makeArrayRef(TargetFlags); } + +bool llvm::registerDefinedBetween(unsigned Reg, + MachineBasicBlock::iterator From, + MachineBasicBlock::iterator To, + const TargetRegisterInfo *TRI) { + for (auto I = From; I != To; ++I) + if (I->modifiesRegister(Reg, TRI)) + return true; + return false; +} + +MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, + const TargetRegisterInfo *TRI) { + // Search backwards to the instruction that defines CSPR. This may or not + // be a CMP, we check that after this loop. If we find another instruction + // that reads cpsr, we return nullptr. + MachineBasicBlock::iterator CmpMI = Br; + while (CmpMI != Br->getParent()->begin()) { + --CmpMI; + if (CmpMI->modifiesRegister(ARM::CPSR, TRI)) + break; + if (CmpMI->readsRegister(ARM::CPSR, TRI)) + break; + } + + // Check that this inst is a CMP r[0-7], #0 and that the register + // is not redefined between the cmp and the br. + if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) + return nullptr; + unsigned Reg = CmpMI->getOperand(0).getReg(); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg); + if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0) + return nullptr; + if (!isARMLowRegister(Reg)) + return nullptr; + if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI)) + return nullptr; + + return &*CmpMI; +} |