aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMBaseInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp412
1 files changed, 327 insertions, 85 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index bbebed59c851..222aa85856a2 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -134,7 +133,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
@@ -707,15 +706,7 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (MCID.getSize())
return MCID.getSize();
- // If this machine instr is an inline asm, measure it.
- if (MI.getOpcode() == ARM::INLINEASM) {
- unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
- if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
- Size = alignTo(Size, 4);
- return Size;
- }
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
+ switch (MI.getOpcode()) {
default:
// pseudo-instruction sizes are zero.
return 0;
@@ -752,6 +743,14 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return 12;
case ARM::SPACE:
return MI.getOperand(1).getImm();
+ case ARM::INLINEASM:
+ case ARM::INLINEASM_BR: {
+ // If this machine instr is an inline asm, measure it.
+ unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
+ Size = alignTo(Size, 4);
+ return Size;
+ }
}
}
@@ -806,6 +805,28 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
}
+void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
+ MIB.addImm(ARMVCC::None);
+ MIB.addReg(0);
+}
+
+void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
+ unsigned DestReg) {
+ addUnpredicatedMveVpredNOp(MIB);
+ MIB.addReg(DestReg, RegState::Undef);
+}
+
+void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
+ MIB.addImm(Cond);
+ MIB.addReg(ARM::VPR, RegState::Implicit);
+}
+
+void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
+ unsigned Cond, unsigned Inactive) {
+ addPredicatedMveVpredNOp(MIB, Cond);
+ MIB.addReg(Inactive);
+}
+
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg,
@@ -831,17 +852,20 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
- else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq;
+ Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
if (Opc) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (Opc == ARM::VORRq)
+ if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- MIB.add(predOps(ARMCC::AL));
+ if (Opc == ARM::MVE_VORR)
+ addUnpredicatedMveVpredROp(MIB, DestReg);
+ else
+ MIB.add(predOps(ARMCC::AL));
return;
}
@@ -852,11 +876,11 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Use VORRq when possible.
if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
- Opc = ARM::VORRq;
+ Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
BeginIdx = ARM::qsub_0;
SubRegs = 2;
} else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
- Opc = ARM::VORRq;
+ Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
BeginIdx = ARM::qsub_0;
SubRegs = 4;
// Fall back to VMOVD.
@@ -891,7 +915,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
- } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+ } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
+ !Subtarget.hasFP64()) {
Opc = ARM::VMOVS;
BeginIdx = ARM::ssub_0;
SubRegs = 2;
@@ -901,6 +926,30 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (DestReg == ARM::CPSR) {
copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
return;
+ } else if (DestReg == ARM::VPR) {
+ assert(ARM::GPRRegClass.contains(SrcReg));
+ BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ return;
+ } else if (SrcReg == ARM::VPR) {
+ assert(ARM::GPRRegClass.contains(DestReg));
+ BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ return;
+ } else if (DestReg == ARM::FPSCR_NZCV) {
+ assert(ARM::GPRRegClass.contains(SrcReg));
+ BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ return;
+ } else if (SrcReg == ARM::FPSCR_NZCV) {
+ assert(ARM::GPRRegClass.contains(DestReg));
+ BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ return;
}
assert(Opc && "Impossible reg-to-reg copy");
@@ -925,10 +974,15 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
DstRegs.insert(Dst);
#endif
Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
- // VORR takes two source operands.
- if (Opc == ARM::VORRq)
+ // VORR (NEON or MVE) takes two source operands.
+ if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
Mov.addReg(Src);
- Mov = Mov.add(predOps(ARMCC::AL));
+ }
+ // MVE VORR takes predicate operands in place of an ordinary condition.
+ if (Opc == ARM::MVE_VORR)
+ addUnpredicatedMveVpredROp(Mov, Dst);
+ else
+ Mov = Mov.add(predOps(ARMCC::AL));
// MOVr can set CC.
if (Opc == ARM::MOVr)
Mov = Mov.add(condCodeOp());
@@ -1010,6 +1064,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
+ } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1042,7 +1103,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 16:
- if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
@@ -1058,6 +1119,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
}
+ } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
+ Subtarget.hasMVEIntegerOps()) {
+ auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
+ MIB.addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+ addUnpredicatedMveVpredNOp(MIB);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1155,6 +1224,13 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return MI.getOperand(0).getReg();
}
break;
+ case ARM::VSTR_P0_off:
+ if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
+ MI.getOperand(1).getImm() == 0) {
+ FrameIndex = MI.getOperand(0).getIndex();
+ return ARM::P0;
+ }
+ break;
case ARM::VST1q64:
case ARM::VST1d64TPseudo:
case ARM::VST1d64QPseudo:
@@ -1177,7 +1253,8 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
SmallVector<const MachineMemOperand *, 1> Accesses;
- if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
+ if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
+ Accesses.size() == 1) {
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
@@ -1224,6 +1301,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
+ } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1260,7 +1343,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 16:
- if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI)
@@ -1273,6 +1356,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
}
+ } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
+ Subtarget.hasMVEIntegerOps()) {
+ auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
+ MIB.addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+ addUnpredicatedMveVpredNOp(MIB);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1369,6 +1459,13 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
return MI.getOperand(0).getReg();
}
break;
+ case ARM::VLDR_P0_off:
+ if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
+ MI.getOperand(1).getImm() == 0) {
+ FrameIndex = MI.getOperand(0).getIndex();
+ return ARM::P0;
+ }
+ break;
case ARM::VLD1q64:
case ARM::VLD1d8TPseudo:
case ARM::VLD1d16TPseudo:
@@ -1397,7 +1494,8 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
SmallVector<const MachineMemOperand *, 1> Accesses;
- if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
+ if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
+ Accesses.size() == 1) {
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
@@ -1480,7 +1578,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
+ if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
return false;
// Look for a copy between even S-registers. That is where we keep floats
@@ -1898,24 +1996,15 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
// If we are optimizing for size, see if the branch in the predecessor can be
// lowered to cbn?z by the constant island lowering pass, and return false if
// so. This results in a shorter instruction sequence.
- if (MBB.getParent()->getFunction().optForSize()) {
+ if (MBB.getParent()->getFunction().hasOptSize()) {
MachineBasicBlock *Pred = *MBB.pred_begin();
if (!Pred->empty()) {
MachineInstr *LastMI = &*Pred->rbegin();
if (LastMI->getOpcode() == ARM::t2Bcc) {
- MachineBasicBlock::iterator CmpMI = LastMI;
- if (CmpMI != Pred->begin()) {
- --CmpMI;
- if (CmpMI->getOpcode() == ARM::tCMPi8 ||
- CmpMI->getOpcode() == ARM::t2CMPri) {
- unsigned Reg = CmpMI->getOperand(0).getReg();
- unsigned PredReg = 0;
- ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
- if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
- isARMLowRegister(Reg))
- return false;
- }
- }
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
+ if (CmpMI)
+ return false;
}
}
}
@@ -1932,6 +2021,15 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
if (!TCycles)
return false;
+ // In thumb code we often end up trading one branch for a IT block, and
+ // if we are cloning the instruction can increase code size. Prevent
+ // blocks with multiple predecesors from being ifcvted to prevent this
+ // cloning.
+ if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
+ if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
+ return false;
+ }
+
// Attempt to estimate the relative costs of predication versus branching.
// Here we scale up each component of UnpredCost to avoid precision issue when
// scaling TCycles/FCycles by Probability.
@@ -2040,9 +2138,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
-static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
- const MachineRegisterInfo &MRI,
- const TargetInstrInfo *TII) {
+MachineInstr *
+ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) const {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
@@ -2050,8 +2148,8 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return nullptr;
- // MI is folded into the MOVCC by predicating it.
- if (!MI->isPredicable())
+ // Check if MI can be predicated and folded into the MOVCC.
+ if (!isPredicable(*MI))
return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
@@ -2266,7 +2364,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!MF.getFunction().optForMinSize())
+ if (!Subtarget.hasMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2332,6 +2430,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
--CurRegEnc) {
unsigned CurReg = RegClass->getRegister(CurRegEnc);
+ if (IsT1PushPop && CurReg > ARM::R7)
+ continue;
if (!IsPop) {
// Pushing any register is completely harmless, mark the register involved
// as undef since we don't care about its value and must not restore it
@@ -2389,7 +2489,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
bool isSub = false;
// Memory operands in inline assembly always use AddrMode2.
- if (Opcode == ARM::INLINEASM)
+ if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
AddrMode = ARMII::AddrMode2;
if (Opcode == ARM::ADDri) {
@@ -2473,6 +2573,15 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
NumBits = 8;
Scale = 2;
break;
+ case ARMII::AddrModeT2_i7:
+ case ARMII::AddrModeT2_i7s2:
+ case ARMII::AddrModeT2_i7s4:
+ ImmIdx = FrameRegIdx+1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 7;
+ Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
+ AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
+ break;
default:
llvm_unreachable("Unsupported addressing mode!");
}
@@ -2543,6 +2652,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
return true;
case ARM::CMPrr:
case ARM::t2CMPrr:
+ case ARM::tCMPr:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = MI.getOperand(1).getReg();
CmpMask = ~0;
@@ -2619,32 +2729,62 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
unsigned SrcReg, unsigned SrcReg2,
- int ImmValue, const MachineInstr *OI) {
- if ((CmpI->getOpcode() == ARM::CMPrr ||
- CmpI->getOpcode() == ARM::t2CMPrr) &&
- (OI->getOpcode() == ARM::SUBrr ||
- OI->getOpcode() == ARM::t2SUBrr) &&
+ int ImmValue, const MachineInstr *OI,
+ bool &IsThumb1) {
+ if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
((OI->getOperand(1).getReg() == SrcReg &&
OI->getOperand(2).getReg() == SrcReg2) ||
(OI->getOperand(1).getReg() == SrcReg2 &&
- OI->getOperand(2).getReg() == SrcReg)))
+ OI->getOperand(2).getReg() == SrcReg))) {
+ IsThumb1 = false;
return true;
+ }
- if ((CmpI->getOpcode() == ARM::CMPri ||
- CmpI->getOpcode() == ARM::t2CMPri) &&
- (OI->getOpcode() == ARM::SUBri ||
- OI->getOpcode() == ARM::t2SUBri) &&
+ if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
+ ((OI->getOperand(2).getReg() == SrcReg &&
+ OI->getOperand(3).getReg() == SrcReg2) ||
+ (OI->getOperand(2).getReg() == SrcReg2 &&
+ OI->getOperand(3).getReg() == SrcReg))) {
+ IsThumb1 = true;
+ return true;
+ }
+
+ if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
OI->getOperand(1).getReg() == SrcReg &&
- OI->getOperand(2).getImm() == ImmValue)
+ OI->getOperand(2).getImm() == ImmValue) {
+ IsThumb1 = false;
+ return true;
+ }
+
+ if (CmpI->getOpcode() == ARM::tCMPi8 &&
+ (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
+ OI->getOperand(2).getReg() == SrcReg &&
+ OI->getOperand(3).getImm() == ImmValue) {
+ IsThumb1 = true;
return true;
+ }
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
OI->getOperand(0).getReg() == SrcReg &&
- OI->getOperand(1).getReg() == SrcReg2)
+ OI->getOperand(1).getReg() == SrcReg2) {
+ IsThumb1 = false;
+ return true;
+ }
+
+ if (CmpI->getOpcode() == ARM::tCMPr &&
+ (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
+ OI->getOpcode() == ARM::tADDrr) &&
+ OI->getOperand(0).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) {
+ IsThumb1 = true;
return true;
+ }
+
return false;
}
@@ -2662,6 +2802,17 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
case ARM::tSUBi3:
case ARM::tSUBi8:
case ARM::tMUL:
+ case ARM::tADC:
+ case ARM::tSBC:
+ case ARM::tRSB:
+ case ARM::tAND:
+ case ARM::tORR:
+ case ARM::tEOR:
+ case ARM::tBIC:
+ case ARM::tMVN:
+ case ARM::tASRri:
+ case ARM::tASRrr:
+ case ARM::tROR:
IsThumb1 = true;
LLVM_FALLTHROUGH;
case ARM::RSBrr:
@@ -2761,7 +2912,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
// Thus we cannot return here.
if (CmpInstr.getOpcode() == ARM::CMPri ||
- CmpInstr.getOpcode() == ARM::t2CMPri)
+ CmpInstr.getOpcode() == ARM::t2CMPri ||
+ CmpInstr.getOpcode() == ARM::tCMPi8)
MI = nullptr;
else
return false;
@@ -2783,20 +2935,22 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// CMP. This peephole works on the vregs, so is still in SSA form. As a
// consequence, the movs won't redefine/kill the MUL operands which would
// make this reordering illegal.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
if (MI && IsThumb1) {
--I;
- bool CanReorder = true;
- const bool HasStmts = I != E;
- for (; I != E; --I) {
- if (I->getOpcode() != ARM::tMOVi8) {
- CanReorder = false;
- break;
+ if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
+ bool CanReorder = true;
+ for (; I != E; --I) {
+ if (I->getOpcode() != ARM::tMOVi8) {
+ CanReorder = false;
+ break;
+ }
+ }
+ if (CanReorder) {
+ MI = MI->removeFromParent();
+ E = CmpInstr;
+ CmpInstr.getParent()->insert(E, MI);
}
- }
- if (HasStmts && CanReorder) {
- MI = MI->removeFromParent();
- E = CmpInstr;
- CmpInstr.getParent()->insert(E, MI);
}
I = CmpInstr;
E = MI;
@@ -2804,12 +2958,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for SubAdd.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool SubAddIsThumb1 = false;
do {
const MachineInstr &Instr = *--I;
// Check whether CmpInstr can be made redundant by the current instruction.
- if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
+ if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
+ SubAddIsThumb1)) {
SubAdd = &*I;
break;
}
@@ -2824,14 +2979,25 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// change. We can't do this transformation.
return false;
- } while (I != B);
+ if (I == B) {
+ // In some cases, we scan the use-list of an instruction for an AND;
+ // that AND is in the same BB, but may not be scheduled before the
+ // corresponding TST. In that case, bail out.
+ //
+ // FIXME: We could try to reschedule the AND.
+ return false;
+ }
+ } while (true);
// Return false if no candidates exist.
if (!MI && !SubAdd)
return false;
- // The single candidate is called MI.
- if (!MI) MI = SubAdd;
+ // If we found a SubAdd, use it as it will be closer to the CMP
+ if (SubAdd) {
+ MI = SubAdd;
+ IsThumb1 = SubAddIsThumb1;
+ }
// We can't use a predicated instruction - it doesn't always write the flags.
if (isPredicated(*MI))
@@ -2899,9 +3065,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// operands will be modified.
unsigned Opc = SubAdd->getOpcode();
bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
- Opc == ARM::SUBri || Opc == ARM::t2SUBri;
- if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
- SubAdd->getOperand(2).getReg() == SrcReg)) {
+ Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
+ Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
+ Opc == ARM::tSUBi8;
+ unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
+ if (!IsSub ||
+ (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
+ SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
// VSel doesn't support condition code update.
if (IsInstrVSel)
return false;
@@ -2979,9 +3149,10 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
++Next;
unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
+ bool IsThumb1;
if (Next != MI.getParent()->end() &&
analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
- isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
+ isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
return false;
return true;
}
@@ -3372,7 +3543,12 @@ unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
I != E; ++I) {
Size += (*I)->getSize();
}
- return Size / 4;
+ // FIXME: The scheduler currently can't handle values larger than 16. But
+ // the values can actually go up to 32 for floating-point load/store
+ // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
+ // operations isn't right; we could end up with "extra" memory operands for
+ // various reasons, like tail merge merging two memory operations.
+ return std::min(Size / 4, 16U);
}
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
@@ -4093,7 +4269,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI.getParent()->getParent();
- // FIXME: Use Function::optForSize().
+ // FIXME: Use Function::hasOptSize().
if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
--Latency;
}
@@ -4517,6 +4693,31 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
return false;
}
+ if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
+ // Make sure we don't generate a lo-lo mov that isn't supported.
+ if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
+ !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
+ ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
+ return false;
+ }
+ }
+ if (MI.getOpcode() == ARM::tPUSH ||
+ MI.getOpcode() == ARM::tPOP ||
+ MI.getOpcode() == ARM::tPOP_RET) {
+ for (int i = 2, e = MI.getNumOperands(); i < e; ++i) {
+ if (MI.getOperand(i).isImplicit() ||
+ !MI.getOperand(i).isReg())
+ continue;
+ unsigned Reg = MI.getOperand(i).getReg();
+ if (Reg < ARM::R0 || Reg > ARM::R7) {
+ if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
+ !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
+ ErrInfo = "Unsupported register in Thumb1 push/pop";
+ return false;
+ }
+ }
+ }
+ }
return true;
}
@@ -5107,3 +5308,44 @@ ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_NONLAZY, "arm-nonlazy"}};
return makeArrayRef(TargetFlags);
}
+
+bool llvm::registerDefinedBetween(unsigned Reg,
+ MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To,
+ const TargetRegisterInfo *TRI) {
+ for (auto I = From; I != To; ++I)
+ if (I->modifiesRegister(Reg, TRI))
+ return true;
+ return false;
+}
+
+MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
+ const TargetRegisterInfo *TRI) {
+ // Search backwards to the instruction that defines CSPR. This may or not
+ // be a CMP, we check that after this loop. If we find another instruction
+ // that reads cpsr, we return nullptr.
+ MachineBasicBlock::iterator CmpMI = Br;
+ while (CmpMI != Br->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
+ break;
+ if (CmpMI->readsRegister(ARM::CPSR, TRI))
+ break;
+ }
+
+ // Check that this inst is a CMP r[0-7], #0 and that the register
+ // is not redefined between the cmp and the br.
+ if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
+ return nullptr;
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
+ if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
+ return nullptr;
+ if (!isARMLowRegister(Reg))
+ return nullptr;
+ if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
+ return nullptr;
+
+ return &*CmpMI;
+}