aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp840
1 files changed, 825 insertions, 15 deletions
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2c3ac816219f..48622aae3cb4 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -71,6 +71,38 @@ namespace {
unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
+ void CMSEClearGPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ const SmallVectorImpl<unsigned> &ClearRegs,
+ unsigned ClobberReg);
+ MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs);
+ MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs);
+ void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs,
+ SmallVectorImpl<unsigned> &ScratchRegs);
+ void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs);
+ void CMSERestoreFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSERestoreFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSERestoreFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
unsigned StrexOp, unsigned UxtOp,
@@ -417,8 +449,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
// Make sure the table is sorted.
static std::atomic<bool> TableChecked(false);
if (!TableChecked.load(std::memory_order_relaxed)) {
- assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) &&
- "NEONLdStTable is not sorted!");
+ assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!");
TableChecked.store(true, std::memory_order_relaxed);
}
#endif
@@ -827,7 +858,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
@@ -852,10 +883,13 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned ImmVal = (unsigned)MO.getImm();
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ unsigned MIFlags = MI.getFlags();
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
LO16.cloneMemRefs(MI);
HI16.cloneMemRefs(MI);
+ LO16.setMIFlags(MIFlags);
+ HI16.setMIFlags(MIFlags);
LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
if (isCC)
@@ -867,6 +901,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned LO16Opc = 0;
unsigned HI16Opc = 0;
+ unsigned MIFlags = MI.getFlags();
if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
LO16Opc = ARM::t2MOVi16;
HI16Opc = ARM::t2MOVTi16;
@@ -880,6 +915,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
+ LO16.setMIFlags(MIFlags);
+ HI16.setMIFlags(MIFlags);
+
switch (MO.getType()) {
case MachineOperand::MO_Immediate: {
unsigned Imm = MO.getImm();
@@ -921,6 +959,582 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
}
+// The size of the area, accessed by that VLSTM/VLLDM
+// S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad)
+static const int CMSE_FP_SAVE_SIZE = 136;
+
+static void determineGPRegsToClear(const MachineInstr &MI,
+ const std::initializer_list<unsigned> &Regs,
+ SmallVectorImpl<unsigned> &ClearRegs) {
+ SmallVector<unsigned, 4> OpRegs;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ OpRegs.push_back(Op.getReg());
+ }
+ llvm::sort(OpRegs);
+
+ std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(),
+ std::back_inserter(ClearRegs));
+}
+
+void ARMExpandPseudo::CMSEClearGPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs,
+ unsigned ClobberReg) {
+
+ if (STI->hasV8_1MMainlineOps()) {
+ // Clear the registers using the CLRM instruction.
+ MachineInstrBuilder CLRM =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL));
+ for (unsigned R : ClearRegs)
+ CLRM.addReg(R, RegState::Define);
+ CLRM.addReg(ARM::APSR, RegState::Define);
+ CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ } else {
+ // Clear the registers and flags by copying ClobberReg into them.
+ // (Baseline can't do a high register clear in one instruction).
+ for (unsigned Reg : ClearRegs) {
+ if (Reg == ClobberReg)
+ continue;
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg)
+ .addReg(ClobberReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M))
+ .addImm(STI->hasDSP() ? 0xc00 : 0x800)
+ .addReg(ClobberReg)
+ .add(predOps(ARMCC::AL));
+ }
+}
+
+// Find which FP registers need to be cleared. The parameter `ClearRegs` is
+// initialised with all elements set to true, and this function resets all the
+// bits, which correspond to register uses. Returns true if any floating point
+// register is defined, false otherwise.
+static bool determineFPRegsToClear(const MachineInstr &MI,
+ BitVector &ClearRegs) {
+ bool DefFP = false;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+
+ unsigned Reg = Op.getReg();
+ if (Op.isDef()) {
+ if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
+ (Reg >= ARM::D0 && Reg <= ARM::D15) ||
+ (Reg >= ARM::S0 && Reg <= ARM::S31))
+ DefFP = true;
+ continue;
+ }
+
+ if (Reg >= ARM::Q0 && Reg <= ARM::Q7) {
+ int R = Reg - ARM::Q0;
+ ClearRegs.reset(R * 4, (R + 1) * 4);
+ } else if (Reg >= ARM::D0 && Reg <= ARM::D15) {
+ int R = Reg - ARM::D0;
+ ClearRegs.reset(R * 2, (R + 1) * 2);
+ } else if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ ClearRegs[Reg - ARM::S0] = false;
+ }
+ }
+ return DefFP;
+}
+
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ BitVector ClearRegs(16, true);
+ (void)determineFPRegsToClear(*MBBI, ClearRegs);
+
+ if (STI->hasV8_1MMainlineOps())
+ return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
+ else
+ return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs);
+}
+
+// Clear the FP registers for v8.0-M, by copying over the content
+// of LR. Uses R12 as a scratch register.
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs) {
+ if (!STI->hasFPRegs())
+ return MBB;
+
+ auto &RetI = *MBBI;
+ const DebugLoc &DL = RetI.getDebugLoc();
+
+ // If optimising for minimum size, clear FP registers unconditionally.
+ // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and
+ // don't clear them if they belong to the non-secure state.
+ MachineBasicBlock *ClearBB, *DoneBB;
+ if (STI->hasMinSize()) {
+ ClearBB = DoneBB = &MBB;
+ } else {
+ MachineFunction *MF = MBB.getParent();
+ ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MBB.getIterator(), ClearBB);
+ MF->insert(++ClearBB->getIterator(), DoneBB);
+
+ DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end());
+ DoneBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(ClearBB);
+ MBB.addSuccessor(DoneBB);
+ ClearBB->addSuccessor(DoneBB);
+
+ // At the new basic blocks we need to have live-in the registers, used
+ // for the return value as well as LR, used to clear registers.
+ for (const MachineOperand &Op : RetI.operands()) {
+ if (!Op.isReg())
+ continue;
+ Register Reg = Op.getReg();
+ if (Reg == ARM::NoRegister || Reg == ARM::LR)
+ continue;
+ assert(Register::isPhysicalRegister(Reg) && "Unallocated register");
+ ClearBB->addLiveIn(Reg);
+ DoneBB->addLiveIn(Reg);
+ }
+ ClearBB->addLiveIn(ARM::LR);
+ DoneBB->addLiveIn(ARM::LR);
+
+ // Read the CONTROL register.
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12)
+ .addImm(20)
+ .add(predOps(ARMCC::AL));
+ // Check bit 3 (SFPA).
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri))
+ .addReg(ARM::R12)
+ .addImm(8)
+ .add(predOps(ARMCC::AL));
+ // If SFPA is clear, jump over ClearBB to DoneBB.
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc))
+ .addMBB(DoneBB)
+ .addImm(ARMCC::EQ)
+ .addReg(ARM::CPSR, RegState::Kill);
+ }
+
+ // Emit the clearing sequence
+ for (unsigned D = 0; D < 8; D++) {
+ // Attempt to clear as double
+ if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) {
+ unsigned Reg = ARM::D0 + D;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(ARM::LR)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // Clear first part as single
+ if (ClearRegs[D * 2 + 0]) {
+ unsigned Reg = ARM::S0 + D * 2;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ }
+ // Clear second part as single
+ if (ClearRegs[D * 2 + 1]) {
+ unsigned Reg = ARM::S0 + D * 2 + 1;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ }
+ }
+ }
+
+ // Clear FPSCR bits 0-4, 7, 28-31
+ // The other bits are program global according to the AAPCS
+ BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12)
+ .add(predOps(ARMCC::AL));
+ BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
+ .addReg(ARM::R12)
+ .addImm(0x0000009F)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
+ .addReg(ARM::R12)
+ .addImm(0xF0000000)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(ClearBB, DL, TII->get(ARM::VMSR))
+ .addReg(ARM::R12)
+ .add(predOps(ARMCC::AL));
+
+ return *DoneBB;
+}
+
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs) {
+ auto &RetI = *MBBI;
+
+ // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for
+ // each contiguous sequence of S-registers.
+ int Start = -1, End = -1;
+ for (int S = 0, E = ClearRegs.size(); S != E; ++S) {
+ if (ClearRegs[S] && S == End + 1) {
+ End = S; // extend range
+ continue;
+ }
+ // Emit current range.
+ if (Start < End) {
+ MachineInstrBuilder VSCCLRM =
+ BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
+ .add(predOps(ARMCC::AL));
+ while (++Start <= End)
+ VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
+ VSCCLRM.addReg(ARM::VPR, RegState::Define);
+ }
+ Start = End = S;
+ }
+ // Emit last range.
+ if (Start < End) {
+ MachineInstrBuilder VSCCLRM =
+ BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
+ .add(predOps(ARMCC::AL));
+ while (++Start <= End)
+ VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
+ VSCCLRM.addReg(ARM::VPR, RegState::Define);
+ }
+
+ return MBB;
+}
+
+void ARMExpandPseudo::CMSESaveClearFPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
+ if (STI->hasV8_1MMainlineOps())
+ CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
+ else
+ CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
+}
+
+// Save and clear FP registers if present
+void ARMExpandPseudo::CMSESaveClearFPRegsV8(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
+ if (!STI->hasFPRegs())
+ return;
+
+ // Store an available register for FPSCR clearing
+ assert(!ScratchRegs.empty());
+ unsigned SpareReg = ScratchRegs.front();
+
+ // save space on stack for VLSTM
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+
+ // Use ScratchRegs to store the fp regs
+ std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
+ std::vector<unsigned> NonclearedFPRegs;
+ for (const MachineOperand &Op : MBBI->operands()) {
+ if (Op.isReg() && Op.isUse()) {
+ unsigned Reg = Op.getReg();
+ assert(!ARM::DPRRegClass.contains(Reg) ||
+ ARM::DPR_VFP2RegClass.contains(Reg));
+ assert(!ARM::QPRRegClass.contains(Reg));
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (ScratchRegs.size() >= 2) {
+ unsigned SaveReg2 = ScratchRegs.pop_back_val();
+ unsigned SaveReg1 = ScratchRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
+ .addReg(SaveReg1, RegState::Define)
+ .addReg(SaveReg2, RegState::Define)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ if (ScratchRegs.size() >= 1) {
+ unsigned SaveReg = ScratchRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ }
+ }
+ }
+
+ bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
+
+ // Lazy store all fp registers to the stack
+ MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
+ ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
+ VLSTM.addReg(R, RegState::Implicit |
+ (LiveRegs.contains(R) ? 0 : RegState::Undef));
+
+ // Restore all arguments
+ for (const auto &Regs : ClearedFPRegs) {
+ unsigned Reg, SaveReg1, SaveReg2;
+ std::tie(Reg, SaveReg1, SaveReg2) = Regs;
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(SaveReg1)
+ .addReg(SaveReg2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(SaveReg1)
+ .add(predOps(ARMCC::AL));
+ }
+
+ for (unsigned Reg : NonclearedFPRegs) {
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (STI->isLittle()) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // For big-endian targets we need to load the two subregisters of Reg
+ // manually because VLDRD would load them in wrong order
+ unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2 + 1)
+ .add(predOps(ARMCC::AL));
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg)
+ .addReg(ARM::SP)
+ .addImm(Reg - ARM::S0)
+ .add(predOps(ARMCC::AL));
+ }
+ }
+ // restore FPSCR from stack and clear bits 0-4, 7, 28-31
+ // The other bits are program global according to the AAPCS
+ if (passesFPReg) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg)
+ .addReg(ARM::SP)
+ .addImm(0x40)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
+ .addReg(SpareReg)
+ .addImm(0x0000009F)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
+ .addReg(SpareReg)
+ .addImm(0xF0000000)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR))
+ .addReg(SpareReg)
+ .add(predOps(ARMCC::AL));
+ // The ldr must happen after a floating point instruction. To prevent the
+ // post-ra scheduler to mess with the order, we create a bundle.
+ finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator());
+ }
+}
+
+void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc &DL,
+ const LivePhysRegs &LiveRegs) {
+ BitVector ClearRegs(32, true);
+ bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
+
+ // If the instruction does not write to a FP register and no elements were
+ // removed from the set, then no FP registers were used to pass
+ // arguments/returns.
+ if (!DefFP && ClearRegs.count() == ClearRegs.size()) {
+ // save space on stack for VLSTM
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+
+ // Lazy store all FP registers to the stack
+ MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
+ ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
+ VLSTM.addReg(R, RegState::Implicit |
+ (LiveRegs.contains(R) ? 0 : RegState::Undef));
+ } else {
+ // Push all the callee-saved registers (s16-s31).
+ MachineInstrBuilder VPUSH =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
+ VPUSH.addReg(Reg);
+
+ // Clear FP registers with a VSCCLRM.
+ (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
+
+ // Save floating-point context.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(-8)
+ .add(predOps(ARMCC::AL));
+ }
+}
+
+// Restore FP registers if present
+void ARMExpandPseudo::CMSERestoreFPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (STI->hasV8_1MMainlineOps())
+ CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
+ else
+ CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
+}
+
+void ARMExpandPseudo::CMSERestoreFPRegsV8(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (!STI->hasFPRegs())
+ return;
+
+ // Use AvailableRegs to store the fp regs
+ std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
+ std::vector<unsigned> NonclearedFPRegs;
+ for (const MachineOperand &Op : MBBI->operands()) {
+ if (Op.isReg() && Op.isDef()) {
+ unsigned Reg = Op.getReg();
+ assert(!ARM::DPRRegClass.contains(Reg) ||
+ ARM::DPR_VFP2RegClass.contains(Reg));
+ assert(!ARM::QPRRegClass.contains(Reg));
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (AvailableRegs.size() >= 2) {
+ unsigned SaveReg2 = AvailableRegs.pop_back_val();
+ unsigned SaveReg1 = AvailableRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
+ .addReg(SaveReg1, RegState::Define)
+ .addReg(SaveReg2, RegState::Define)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ if (AvailableRegs.size() >= 1) {
+ unsigned SaveReg = AvailableRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ }
+ }
+ }
+
+ // Push FP regs that cannot be restored via normal registers on the stack
+ for (unsigned Reg : NonclearedFPRegs) {
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg)
+ .addReg(ARM::SP)
+ .addImm(Reg - ARM::S0)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Lazy load fp regs from stack
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ // Restore all FP registers via normal registers
+ for (const auto &Regs : ClearedFPRegs) {
+ unsigned Reg, SaveReg1, SaveReg2;
+ std::tie(Reg, SaveReg1, SaveReg2) = Regs;
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(SaveReg1)
+ .addReg(SaveReg2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(SaveReg1)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Pop the stack space
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+}
+
+static bool definesOrUsesFPReg(const MachineInstr &MI) {
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+ unsigned Reg = Op.getReg();
+ if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
+ (Reg >= ARM::D0 && Reg <= ARM::D15) ||
+ (Reg >= ARM::S0 && Reg <= ARM::S31))
+ return true;
+ }
+ return false;
+}
+
+void ARMExpandPseudo::CMSERestoreFPRegsV81(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (!definesOrUsesFPReg(*MBBI)) {
+ // Load FP registers from stack.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ // Pop the stack space
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // Restore the floating point context.
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post),
+ ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(8)
+ .add(predOps(ARMCC::AL));
+
+ // Pop all the callee-saved registers (s16-s31).
+ MachineInstrBuilder VPOP =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
+ VPOP.addReg(Reg, RegState::Define);
+ }
+}
+
/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
/// possible. This only gets used at -O0 so we don't care about efficiency of
/// the generated code.
@@ -1149,6 +1763,93 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
return true;
}
+static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int JumpReg,
+ const LivePhysRegs &LiveRegs, bool Thumb1Only) {
+ const DebugLoc &DL = MBBI->getDebugLoc();
+ if (Thumb1Only) { // push Lo and Hi regs separately
+ MachineInstrBuilder PushMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+ PushMIB.addReg(
+ Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
+ }
+
+ // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
+ // regs that we just saved and push the low regs again, taking care to
+ // not clobber JumpReg. If JumpReg is one of the low registers, push first
+ // the values of r9-r11, and then r8. That would leave them ordered in
+ // memory, and allow us to later pop them with a single instructions.
+ // FIXME: Could also use any of r0-r3 that are free (including in the
+ // first PUSH above).
+ for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) {
+ if (JumpReg == LoReg)
+ continue;
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
+ .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
+ .add(predOps(ARMCC::AL));
+ --HiReg;
+ }
+ MachineInstrBuilder PushMIB2 =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+ if (Reg == JumpReg)
+ continue;
+ PushMIB2.addReg(Reg, RegState::Kill);
+ }
+
+ // If we couldn't use a low register for temporary storage (because it was
+ // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
+ // saved.
+ if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
+ int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
+ .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(LoReg, RegState::Kill);
+ }
+ } else { // push Lo and Hi registers with a single instruction
+ MachineInstrBuilder PushMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
+ PushMIB.addReg(
+ Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
+ }
+ }
+}
+
+static void CMSEPopCalleeSaves(const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int JumpReg,
+ bool Thumb1Only) {
+ const DebugLoc &DL = MBBI->getDebugLoc();
+ if (Thumb1Only) {
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
+ for (int R = 0; R < 4; ++R) {
+ PopMIB.addReg(ARM::R4 + R, RegState::Define);
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R)
+ .addReg(ARM::R4 + R, RegState::Kill)
+ .add(predOps(ARMCC::AL));
+ }
+ MachineInstrBuilder PopMIB2 =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
+ for (int R = 0; R < 4; ++R)
+ PopMIB2.addReg(ARM::R4 + R, RegState::Define);
+ } else { // pop Lo and Hi registers with a single instruction
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
+ PopMIB.addReg(Reg, RegState::Define);
+ }
+}
bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -1207,12 +1908,117 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Update call site info and delete the pseudo instruction TCRETURN.
- MBB.getParent()->moveCallSiteInfo(&MI, &*NewMI);
+ if (MI.isCandidateForCallSiteEntry())
+ MI.getMF()->moveCallSiteInfo(&MI, &*NewMI);
MBB.erase(MBBI);
MBBI = NewMI;
return true;
}
+ case ARM::tBXNS_RET: {
+ MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);
+
+ if (STI->hasV8_1MMainlineOps()) {
+ // Restore the non-secure floating point context.
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(4)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Clear all GPR that are not a use of the return instruction.
+ assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) {
+ return !Op.isReg() || Op.getReg() != ARM::R12;
+ }));
+ SmallVector<unsigned, 5> ClearRegs;
+ determineGPRegsToClear(
+ *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs);
+ CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs,
+ ARM::LR);
+
+ MachineInstrBuilder NewMI =
+ BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(),
+ TII->get(ARM::tBXNS))
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ for (const MachineOperand &Op : MI.operands())
+ NewMI->addOperand(Op);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tBLXNS_CALL: {
+ DebugLoc DL = MBBI->getDebugLoc();
+ unsigned JumpReg = MBBI->getOperand(0).getReg();
+
+ // Figure out which registers are live at the point immediately before the
+ // call. When we indiscriminately push a set of registers, the live
+ // registers are added as ordinary use operands, whereas dead registers
+ // are "undef".
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(MBB);
+ for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
+ LiveRegs.stepBackward(MI);
+ LiveRegs.stepBackward(*MBBI);
+
+ CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
+ AFI->isThumb1OnlyFunction());
+
+ SmallVector<unsigned, 16> ClearRegs;
+ determineGPRegsToClear(*MBBI,
+ {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
+ ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9,
+ ARM::R10, ARM::R11, ARM::R12},
+ ClearRegs);
+ auto OriginalClearRegs = ClearRegs;
+
+ // Get the first cleared register as a scratch (to use later with tBIC).
+ // We need to use the first so we can ensure it is a low register.
+ unsigned ScratchReg = ClearRegs.front();
+
+ // Clear LSB of JumpReg
+ if (AFI->isThumb2Function()) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg)
+ .addReg(JumpReg)
+ .addImm(1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ // We need to use an extra register to cope with 8M Baseline,
+ // since we have saved all of the registers we are ok to trash a non
+ // argument register here.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg)
+ .add(condCodeOp())
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(JumpReg)
+ .addReg(ScratchReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
+ ClearRegs); // save+clear FP regs with ClearRegs
+ CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
+
+ const MachineInstrBuilder NewCall =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr))
+ .add(predOps(ARMCC::AL))
+ .addReg(JumpReg, RegState::Kill);
+
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
+ NewCall->addOperand(MI.getOperand(I));
+ if (MI.isCandidateForCallSiteEntry())
+ MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());
+
+ CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers
+
+ CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction());
+
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::VMOVHcc:
case ARM::VMOVScc:
case ARM::VMOVDcc: {
@@ -1359,17 +2165,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// If there's dynamic realignment, adjust for it.
if (RI.needsStackRealignment(MF)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
assert (!AFI->isThumb1OnlyFunction());
// Emit bic r6, r6, MaxAlign
- assert(MaxAlign <= 256 && "The BIC instruction cannot encode "
- "immediates larger than 256 with all lower "
- "bits set.");
+ assert(MaxAlign <= Align(256) &&
+ "The BIC instruction cannot encode "
+ "immediates larger than 256 with all lower "
+ "bits set.");
unsigned bicOpc = AFI->isThumbFunction() ?
ARM::t2BICri : ARM::BICri;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
.addReg(ARM::R6, RegState::Kill)
- .addImm(MaxAlign - 1)
+ .addImm(MaxAlign.value() - 1)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
}
@@ -1410,17 +2217,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
const bool Thumb = Opcode == ARM::tTPsoft;
MachineInstrBuilder MIB;
+ MachineFunction *MF = MBB.getParent();
if (STI->genLongCalls()) {
- MachineFunction *MF = MBB.getParent();
MachineConstantPool *MCP = MF->getConstantPool();
unsigned PCLabelID = AFI->createPICLabelUId();
MachineConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
"__aeabi_read_tp", PCLabelID, 0);
Register Reg = MI.getOperand(0).getReg();
- MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
- .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
+ MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
+ .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
if (!Thumb)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
@@ -1440,7 +2248,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.cloneMemRefs(MI);
TransferImpOps(MI, MIB, MIB);
- MI.getMF()->moveCallSiteInfo(&MI, &*MIB);
+ // Update the call site info.
+ if (MI.isCandidateForCallSiteEntry())
+ MF->moveCallSiteInfo(&MI, &*MIB);
MI.eraseFromParent();
return true;
}
@@ -1504,7 +2314,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
- .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
+ .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
if (IsARM)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));