diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 227 |
1 files changed, 185 insertions, 42 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5b0bae4d9274..80ba7b5f0d2e 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -343,6 +343,13 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, } // Branch analysis. +// Cond vector output format: +// 0 elements indicates an unconditional branch +// 2 elements indicates a conditional branch; the elements are +// the condition to check and the CPSR. +// 3 elements indicates a hardware loop end; the elements +// are the opcode, the operand value to test, and a dummy +// operand used to pad out to 3 operands. bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -394,6 +401,17 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } else if (I->isReturn()) { // Returns can't be analyzed, but we should run cleanup. CantAnalyze = true; + } else if (I->getOpcode() == ARM::t2LoopEnd && + MBB.getParent() + ->getSubtarget<ARMSubtarget>() + .enableMachinePipeliner()) { + if (!Cond.empty()) + return true; + FBB = TBB; + TBB = I->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + Cond.push_back(MachineOperand::CreateImm(0)); } else { // We encountered other unrecognized terminator. Bail out immediately. return true; @@ -457,7 +475,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, return 0; if (!isUncondBranchOpcode(I->getOpcode()) && - !isCondBranchOpcode(I->getOpcode())) + !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) return 0; // Remove the branch. @@ -467,7 +485,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, if (I == MBB.begin()) return 1; --I; - if (!isCondBranchOpcode(I->getOpcode())) + if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) return 1; // Remove the branch. @@ -491,8 +509,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && - "ARM branch conditions have two components!"); + assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) && + "ARM branch conditions have two or three components!"); // For conditional branches, we use addOperand to preserve CPSR flags. @@ -502,19 +520,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); else BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - } else + } else if (Cond.size() == 2) { BuildMI(&MBB, DL, get(BccOpc)) .addMBB(TBB) .addImm(Cond[0].getImm()) .add(Cond[1]); + } else + BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); return 1; } // Two-way conditional branch. - BuildMI(&MBB, DL, get(BccOpc)) - .addMBB(TBB) - .addImm(Cond[0].getImm()) - .add(Cond[1]); + if (Cond.size() == 2) + BuildMI(&MBB, DL, get(BccOpc)) + .addMBB(TBB) + .addImm(Cond[0].getImm()) + .add(Cond[1]); + else if (Cond.size() == 3) + BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); else @@ -524,9 +547,12 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, bool ARMBaseInstrInfo:: reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { - ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); - Cond[0].setImm(ARMCC::getOppositeCondition(CC)); - return false; + if (Cond.size() == 2) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; + } + return true; } bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { @@ -556,7 +582,7 @@ std::string ARMBaseInstrInfo::createMIROperandComment( return GenericComment; // If not, check if we have an immediate operand. - if (Op.getType() != MachineOperand::MO_Immediate) + if (!Op.isImm()) return std::string(); // And print its corresponding condition code if the immediate is a @@ -1703,7 +1729,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // or some other super-register. int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); if (ImpDefIdx != -1) - MI.RemoveOperand(ImpDefIdx); + MI.removeOperand(ImpDefIdx); // Change the opcode and operands. MI.setDesc(get(ARM::VMOVD)); @@ -2045,6 +2071,9 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) return true; + if (isSEHInstruction(MI)) + return true; + // Treat the start of the IT block as a scheduling boundary, but schedule // t2IT along with all instructions following it. // FIXME: This is a big hammer. But the alternative is to add all potential @@ -2598,7 +2627,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, // ahead: strip all existing registers off and add them back again // in the right order. for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) - MI->RemoveOperand(i); + MI->removeOperand(i); // Add the complete list back in. MachineInstrBuilder MIB(MF, &*MI); @@ -2626,7 +2655,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Turn it into a move. MI.setDesc(TII.get(ARM::MOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.RemoveOperand(FrameRegIdx+1); + MI.removeOperand(FrameRegIdx+1); Offset = 0; return true; } else if (Offset < 0) { @@ -5103,7 +5132,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) MI.setDesc(get(ARM::VORRd)); @@ -5122,7 +5151,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); @@ -5155,7 +5184,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, break; for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) // Again DDst may be undefined at the beginning of this instruction. @@ -5190,7 +5219,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, break; for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); if (DSrc == DDst) { // Destination can be: @@ -5766,26 +5795,25 @@ struct OutlinerCosts { SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} }; -unsigned -ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { - assert(C.LRUWasSet && "LRU wasn't set?"); +Register +ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { MachineFunction *MF = C.getMF(); - const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>( - MF->getSubtarget().getRegisterInfo()); + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + const ARMBaseRegisterInfo *ARI = + static_cast<const ARMBaseRegisterInfo *>(&TRI); BitVector regsReserved = ARI->getReservedRegs(*MF); // Check if there is an available register across the sequence that we can // use. - for (unsigned Reg : ARM::rGPRRegClass) { + for (Register Reg : ARM::rGPRRegClass) { if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && Reg != ARM::LR && // LR is not reserved, but don't use it. Reg != ARM::R12 && // R12 is not guaranteed to be preserved. - C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && + C.isAvailableInsideSeq(Reg, TRI)) return Reg; } - - // No suitable register. Return 0. - return 0u; + return Register(); } // Compute liveness of LR at the point after the interval [I, E), which @@ -5833,9 +5861,8 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // Compute liveness information for each candidate, and set FlagsSetInAll. const TargetRegisterInfo &TRI = getRegisterInfo(); - std::for_each( - RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), - [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + for (outliner::Candidate &C : RepeatedSequenceLocs) + FlagsSetInAll &= C.Flags; // According to the ARM Procedure Call Standard, the following are // undefined on entry/exit from a function call: @@ -5854,9 +5881,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // to compute liveness here. if (C.Flags & UnsafeRegsDead) return false; - C.initLRU(TRI); - LiveRegUnits LRU = C.LRU; - return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI); }; // Are there any candidates where those registers are live? @@ -5969,7 +5994,6 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( std::vector<outliner::Candidate> CandidatesWithoutStackFixups; for (outliner::Candidate &C : RepeatedSequenceLocs) { - C.initLRU(TRI); // LR liveness is overestimated in return blocks, unless they end with a // tail call. const auto Last = C.getMBB()->rbegin(); @@ -5977,7 +6001,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( C.getMBB()->isReturnBlock() && !Last->isCall() ? isLRAvailable(TRI, Last, (MachineBasicBlock::reverse_iterator)C.front()) - : C.LRU.available(ARM::LR); + : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI); if (LRIsAvailable) { FrameID = MachineOutlinerNoLRSave; NumBytesNoStackCalls += Costs.CallNoLRSave; @@ -5996,7 +6020,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // Is SP used in the sequence at all? If not, we don't have to modify // the stack, so we are guaranteed to get the same frame. - else if (C.UsedInSequence.available(ARM::SP)) { + else if (C.isAvailableInsideSeq(ARM::SP, TRI)) { NumBytesNoStackCalls += Costs.CallDefault; C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault); CandidatesWithoutStackFixups.push_back(C); @@ -6189,8 +6213,8 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, LiveRegUnits LRU(getRegisterInfo()); - std::for_each(MBB.rbegin(), MBB.rend(), - [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + for (MachineInstr &MI : llvm::reverse(MBB)) + LRU.accumulate(MI); // Check if each of the unsafe registers are available... bool R12AvailableInBlock = LRU.available(ARM::R12); @@ -6635,7 +6659,7 @@ void ARMBaseInstrInfo::buildOutlinedFrame( MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF, const outliner::Candidate &C) const { + MachineFunction &MF, outliner::Candidate &C) const { MachineInstrBuilder MIB; MachineBasicBlock::iterator CallPt; unsigned Opc; @@ -6726,3 +6750,122 @@ unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { : ARM::BLX_pred; } +namespace { +class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *EndLoop, *LoopCount; + MachineFunction *MF; + const TargetInstrInfo *TII; + + // Meanings of the various stuff with loop types: + // t2Bcc: + // EndLoop = branch at end of original BB that will become a kernel + // LoopCount = CC setter live into branch + // t2LoopEnd: + // EndLoop = branch at end of original BB + // LoopCount = t2LoopDec +public: + ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount) + : EndLoop(EndLoop), LoopCount(LoopCount), + MF(EndLoop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()) {} + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop || MI == LoopCount; + } + + Optional<bool> createTripCountGreaterCondition( + int TC, MachineBasicBlock &MBB, + SmallVectorImpl<MachineOperand> &Cond) override { + + if (isCondBranchOpcode(EndLoop->getOpcode())) { + Cond.push_back(EndLoop->getOperand(1)); + Cond.push_back(EndLoop->getOperand(2)); + if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) { + TII->reverseBranchCondition(Cond); + } + return {}; + } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) { + // General case just lets the unrolled t2LoopDec do the subtraction and + // therefore just needs to check if zero has been reached. + MachineInstr *LoopDec = nullptr; + for (auto &I : MBB.instrs()) + if (I.getOpcode() == ARM::t2LoopDec) + LoopDec = &I; + assert(LoopDec && "Unable to find copied LoopDec"); + // Check if we're done with the loop. + BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri)) + .addReg(LoopDec->getOperand(0).getReg()) + .addImm(0) + .addImm(ARMCC::AL) + .addReg(ARM::NoRegister); + Cond.push_back(MachineOperand::CreateImm(ARMCC::EQ)); + Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false)); + return {}; + } else + llvm_unreachable("Unknown EndLoop"); + } + + void setPreheader(MachineBasicBlock *NewPreheader) override {} + + void adjustTripCount(int TripCountAdjust) override {} + + void disposed() override {} +}; +} // namespace + +std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> +ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + MachineBasicBlock *Preheader = *LoopBB->pred_begin(); + if (Preheader == LoopBB) + Preheader = *std::next(LoopBB->pred_begin()); + + if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) { + // If the branch is a Bcc, then the CPSR should be set somewhere within the + // block. We need to determine the reaching definition of CPSR so that + // it can be marked as non-pipelineable, allowing the pipeliner to force + // it into stage 0 or give up if it cannot or will not do so. + MachineInstr *CCSetter = nullptr; + for (auto &L : LoopBB->instrs()) { + if (L.isCall()) + return nullptr; + if (isCPSRDefined(L)) + CCSetter = &L; + } + if (CCSetter) + return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter); + else + return nullptr; // Unable to find the CC setter, so unable to guarantee + // that pipeline will work + } + + // Recognize: + // preheader: + // %1 = t2DoopLoopStart %0 + // loop: + // %2 = phi %1, <not loop>, %..., %loop + // %3 = t2LoopDec %2, <imm> + // t2LoopEnd %3, %loop + + if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) { + for (auto &L : LoopBB->instrs()) + if (L.isCall()) + return nullptr; + else if (isVCTP(&L)) + return nullptr; + Register LoopDecResult = I->getOperand(0).getReg(); + MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); + MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult); + if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) + return nullptr; + MachineInstr *LoopStart = nullptr; + for (auto &J : Preheader->instrs()) + if (J.getOpcode() == ARM::t2DoLoopStart) + LoopStart = &J; + if (!LoopStart) + return nullptr; + return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec); + } + return nullptr; +} |