diff options
Diffstat (limited to 'lib/Target/ARM/ARMConstantIslandPass.cpp')
-rw-r--r-- | lib/Target/ARM/ARMConstantIslandPass.cpp | 289 |
1 files changed, 203 insertions, 86 deletions
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 60e5d7bf6098..24ca25f73e96 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -26,8 +26,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -69,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); STATISTIC(NumJTMoved, "Number of jump table destination blocks moved"); STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); +STATISTIC(NumLEInserted, "Number of LE backwards branches inserted"); static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), @@ -212,6 +215,7 @@ namespace { const ARMBaseInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; + MachineDominatorTree *DT = nullptr; bool isThumb; bool isThumb1; bool isThumb2; @@ -224,6 +228,11 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -238,7 +247,7 @@ namespace { void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs); bool BBHasFallthrough(MachineBasicBlock *MBB); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - unsigned getCPELogAlign(const MachineInstr *CPEMI); + Align getCPEAlign(const MachineInstr *CPEMI); void scanFunctionJumpTables(); void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs); MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); @@ -327,8 +336,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { const BasicBlockInfo &BBI = BBInfo[J]; dbgs() << format("%08x %bb.%u\t", BBI.Offset, J) << " kb=" << unsigned(BBI.KnownBits) - << " ua=" << unsigned(BBI.Unalign) - << " pa=" << unsigned(BBI.PostAlign) + << " ua=" << unsigned(BBI.Unalign) << " pa=" << Log2(BBI.PostAlign) << format(" size=%#x\n", BBInfo[J].Size); } }); @@ -349,6 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { isPositionIndependentOrROPI = STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); AFI = MF->getInfo<ARMFunctionInfo>(); + DT = &getAnalysis<MachineDominatorTree>(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); @@ -357,9 +366,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { HasFarJump = false; bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB); - // This pass invalidates liveness information when it splits basic blocks. - MF->getRegInfo().invalidateLiveness(); - // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. MF->RenumberBlocks(); @@ -398,7 +404,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Functions with jump tables need an alignment of 4 because they use the ADR // instruction, which aligns the PC to 4 bytes before adding an offset. if (!T2JumpTables.empty()) - MF->ensureAlignment(2); + MF->ensureAlignment(Align(4)); /// Remove dead constant pool entries. MadeChange |= removeUnusedCPEntries(); @@ -487,8 +493,9 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); - // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). - unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + // MachineConstantPool measures alignment in bytes. + const Align MaxAlign(MCP->getConstantPoolAlignment()); + const unsigned MaxLogAlign = Log2(MaxAlign); // Mark the basic block as required by the const-pool. BB->setAlignment(MaxAlign); @@ -501,7 +508,8 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) // alignment of all entries as long as BB is sufficiently aligned. Keep // track of the insertion point for each alignment. We are going to bucket // sort the entries as they are created. - SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end()); + SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxLogAlign + 1, + BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. @@ -526,7 +534,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) // Ensure that future entries with higher alignment get inserted before // CPEMI. This is bucket sort with iterators. - for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) + for (unsigned a = LogAlign + 1; a <= MaxLogAlign; ++a) if (InsPoint[a] == InsAt) InsPoint[a] = CPEMI; @@ -640,29 +648,27 @@ ARMConstantIslands::findConstPoolEntry(unsigned CPI, return nullptr; } -/// getCPELogAlign - Returns the required alignment of the constant pool entry -/// represented by CPEMI. Alignment is measured in log2(bytes) units. -unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { +/// getCPEAlign - Returns the required alignment of the constant pool entry +/// represented by CPEMI. +Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) { switch (CPEMI->getOpcode()) { case ARM::CONSTPOOL_ENTRY: break; case ARM::JUMPTABLE_TBB: - return isThumb1 ? 2 : 0; + return isThumb1 ? Align(4) : Align(1); case ARM::JUMPTABLE_TBH: - return isThumb1 ? 2 : 1; + return isThumb1 ? Align(4) : Align(2); case ARM::JUMPTABLE_INSTS: - return 1; + return Align(2); case ARM::JUMPTABLE_ADDRS: - return 2; + return Align(4); default: llvm_unreachable("unknown constpool entry kind"); } unsigned CPI = getCombinedIndex(CPEMI); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); - unsigned Align = MCP->getConstants()[CPI].getAlignment(); - assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); - return Log2_32(Align); + return Align(MCP->getConstants()[CPI].getAlignment()); } /// scanFunctionJumpTables - Do a scan of the function, building up @@ -687,7 +693,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { BBInfoVector &BBInfo = BBUtils->getBBInfo(); // The known bits of the entry block offset are determined by the function // alignment. - BBInfo.front().KnownBits = MF->getAlignment(); + BBInfo.front().KnownBits = Log2(MF->getAlignment()); // Compute block offsets and known bits. BBUtils->adjustBBOffsetsAfter(&MF->front()); @@ -824,11 +830,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { Scale = 2; // +-(offset_8*2) NegOk = true; break; - - case ARM::tLDRHi: - Bits = 5; - Scale = 2; // +(offset_5*2) - break; } // Remember that this is a user of a CP entry. @@ -885,6 +886,13 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); + // Collect liveness information at MI. + LivePhysRegs LRs(*MF->getSubtarget().getRegisterInfo()); + LRs.addLiveOuts(*OrigBB); + auto LivenessEnd = ++MachineBasicBlock::iterator(MI).getReverse(); + for (MachineInstr &LiveMI : make_range(OrigBB->rbegin(), LivenessEnd)) + LRs.stepBackward(LiveMI); + // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); @@ -913,6 +921,12 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { // OrigBB branches to NewBB. OrigBB->addSuccessor(NewBB); + // Update live-in information in the new block. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (MCPhysReg L : LRs) + if (!MRI.isReserved(L)) + NewBB->addLiveIn(L); + // Update internal data structures to account for the newly inserted MBB. // This is almost the same as updateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. @@ -1007,13 +1021,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, MachineBasicBlock* Water, CPUser &U, unsigned &Growth) { BBInfoVector &BBInfo = BBUtils->getBBInfo(); - unsigned CPELogAlign = getCPELogAlign(U.CPEMI); - unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); - unsigned NextBlockOffset, NextBlockAlignment; + const Align CPEAlign = getCPEAlign(U.CPEMI); + const unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPEAlign); + unsigned NextBlockOffset; + Align NextBlockAlignment; MachineFunction::const_iterator NextBlock = Water->getIterator(); if (++NextBlock == MF->end()) { NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); - NextBlockAlignment = 0; } else { NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; NextBlockAlignment = NextBlock->getAlignment(); @@ -1028,13 +1042,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, Growth = CPEEnd - NextBlockOffset; // Compute the padding that would go at the end of the CPE to align the next // block. - Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment); + Growth += offsetToAlignment(CPEEnd, NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise // the offset of the instruction. Also account for unknown alignment padding // in blocks between CPE and the user. if (CPEOffset < UserOffset) - UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + UserOffset += Growth + UnknownPadding(MF->getAlignment(), Log2(CPEAlign)); } else // CPE fits in existing padding. Growth = 0; @@ -1200,8 +1214,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, // inserting islands between BB0 and BB1 makes other accesses out of range. MachineBasicBlock *UserBB = U.MI->getParent(); BBInfoVector &BBInfo = BBUtils->getBBInfo(); - unsigned MinNoSplitDisp = - BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI)); + const Align CPEAlign = getCPEAlign(U.CPEMI); + unsigned MinNoSplitDisp = BBInfo[UserBB->getNumber()].postOffset(CPEAlign); if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2) return false; for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();; @@ -1254,7 +1268,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; - unsigned CPELogAlign = getCPELogAlign(CPEMI); + const Align CPEAlign = getCPEAlign(CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); BBInfoVector &BBInfo = BBUtils->getBBInfo(); const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; @@ -1267,7 +1281,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Size of branch to insert. unsigned Delta = isThumb1 ? 2 : 4; // Compute the offset where the CPE will begin. - unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; + unsigned CPEOffset = UserBBI.postOffset(CPEAlign) + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) @@ -1308,11 +1322,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Try to split the block so it's fully aligned. Compute the latest split // point where we can add a 4-byte branch instruction, and then align to - // LogAlign which is the largest possible alignment in the function. - unsigned LogAlign = MF->getAlignment(); - assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + // Align which is the largest possible alignment in the function. + const Align Align = MF->getAlignment(); + assert(Align >= CPEAlign && "Over-aligned constant pool entry"); unsigned KnownBits = UserBBI.internalKnownBits(); - unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned UPad = UnknownPadding(Align, KnownBits); unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad; LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); @@ -1323,7 +1337,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, BaseInsertOffset -= 4; LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) - << " la=" << LogAlign << " kb=" << KnownBits + << " la=" << Log2(Align) << " kb=" << KnownBits << " up=" << UPad << '\n'); // This could point off the end of the block if we've already got constant @@ -1337,6 +1351,28 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, BaseInsertOffset = std::max(UserBBI.postOffset() - UPad - 8, UserOffset + TII->getInstSizeInBytes(*UserMI) + 1); + // If the CP is referenced(ie, UserOffset) is in first four instructions + // after IT, this recalculated BaseInsertOffset could be in the middle of + // an IT block. If it is, change the BaseInsertOffset to just after the + // IT block. This still make the CP Entry is in range becuase of the + // following reasons. + // 1. The initial BaseseInsertOffset calculated is (UserOffset + + // U.getMaxDisp() - UPad). + // 2. An IT block is only at most 4 instructions plus the "it" itself (18 + // bytes). + // 3. All the relevant instructions support much larger Maximum + // displacement. + MachineBasicBlock::iterator I = UserMI; + ++I; + for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI), + PredReg = 0; + I->getOpcode() != ARM::t2IT && + getITInstrPredicate(*I, PredReg) != ARMCC::AL; + Offset += TII->getInstSizeInBytes(*I), I = std::next(I)) { + BaseInsertOffset = + std::max(BaseInsertOffset, Offset + TII->getInstSizeInBytes(*I) + 1); + assert(I != UserMBB->end() && "Fell off end of block"); + } LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); } unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1354,8 +1390,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUIndex]; if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. - BaseInsertOffset -= 1u << LogAlign; - EndInsertOffset -= 1u << LogAlign; + BaseInsertOffset -= Align.value(); + EndInsertOffset -= Align.value(); } // This is overly conservative, as we don't account for CPEMIs being // reused within the block, but it doesn't matter much. Also assume CPEs @@ -1397,9 +1433,10 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, } // We really must not split an IT block. - LLVM_DEBUG(unsigned PredReg; assert( - !isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL)); - +#ifndef NDEBUG + unsigned PredReg; + assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL); +#endif NewMBB = splitBlockBeforeInstr(&*MI); } @@ -1464,9 +1501,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, // Always align the new block because CP entries can be smaller than 4 // bytes. Be careful not to decrease the existing alignment, e.g. NewMBB may // be an already aligned constant pool block. - const unsigned Align = isThumb ? 1 : 2; - if (NewMBB->getAlignment() < Align) - NewMBB->setAlignment(Align); + const Align Alignment = isThumb ? Align(2) : Align(4); + if (NewMBB->getAlignment() < Alignment) + NewMBB->setAlignment(Alignment); // Remove the original WaterList entry; we want subsequent insertions in // this vicinity to go after the one we're about to insert. This @@ -1495,7 +1532,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, decrementCPEReferenceCount(CPI, CPEMI); // Mark the basic block as aligned as required by the const-pool entry. - NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + NewIsland->setAlignment(getCPEAlign(U.CPEMI)); // Increase the size of the island block to account for the new entry. BBUtils->adjustBBSize(NewIsland, Size); @@ -1529,10 +1566,11 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { BBInfo[CPEBB->getNumber()].Size = 0; // This block no longer needs to be aligned. - CPEBB->setAlignment(0); - } else + CPEBB->setAlignment(Align::None()); + } else { // Entries are sorted by descending alignment, so realign from the front. - CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin())); + CPEBB->setAlignment(getCPEAlign(&*CPEBB->begin())); + } BBUtils->adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if @@ -1620,7 +1658,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { // L2: ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm(); CC = ARMCC::getOppositeCondition(CC); - unsigned CCReg = MI->getOperand(2).getReg(); + Register CCReg = MI->getOperand(2).getReg(); // If the branch is at the end of its MBB and that has a fall-through block, // direct the updated conditional branch to the fall-through block. Otherwise, @@ -1778,16 +1816,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() { return MadeChange; } + bool ARMConstantIslands::optimizeThumb2Branches() { - bool MadeChange = false; - // The order in which branches appear in ImmBranches is approximately their - // order within the function body. By visiting later branches first, we reduce - // the distance between earlier forward branches and their targets, making it - // more likely that the cbn?z optimization, which can only apply to forward - // branches, will succeed. - for (unsigned i = ImmBranches.size(); i != 0; --i) { - ImmBranch &Br = ImmBranches[i-1]; + auto TryShrinkBranch = [this](ImmBranch &Br) { unsigned Opcode = Br.MI->getOpcode(); unsigned NewOpc = 0; unsigned Scale = 1; @@ -1815,47 +1847,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() { BBUtils->adjustBBSize(MBB, -2); BBUtils->adjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; - MadeChange = true; + return true; } } + return false; + }; - Opcode = Br.MI->getOpcode(); - if (Opcode != ARM::tBcc) - continue; + struct ImmCompare { + MachineInstr* MI = nullptr; + unsigned NewOpc = 0; + }; + + auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp, + MachineBasicBlock *DestBB) { + ImmCmp.MI = nullptr; + ImmCmp.NewOpc = 0; // If the conditional branch doesn't kill CPSR, then CPSR can be liveout // so this transformation is not safe. if (!Br.MI->killsRegister(ARM::CPSR)) - continue; + return false; - NewOpc = 0; unsigned PredReg = 0; + unsigned NewOpc = 0; ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) NewOpc = ARM::tCBNZ; - if (!NewOpc) - continue; - MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + else + return false; + // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2; BBInfoVector &BBInfo = BBUtils->getBBInfo(); unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126) - continue; + return false; // Search backwards to find a tCMPi8 auto *TRI = STI->getRegisterInfo(); MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI); if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8) + return false; + + ImmCmp.MI = CmpMI; + ImmCmp.NewOpc = NewOpc; + return true; + }; + + auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) { + if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() || + STI->hasMinSize()) + return false; + + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) || + !BBUtils->isBBInRange(Br.MI, DestBB, 4094)) + return false; + + if (!DT->dominates(DestBB, MBB)) + return false; + + // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite + // target of Br. So now we need to reverse the condition. + Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ; + + MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), + TII->get(ARM::t2LE)); + MIB.add(Br.MI->getOperand(0)); + Br.MI->eraseFromParent(); + Br.MI = MIB; + ++NumLEInserted; + return true; + }; + + bool MadeChange = false; + + // The order in which branches appear in ImmBranches is approximately their + // order within the function body. By visiting later branches first, we reduce + // the distance between earlier forward branches and their targets, making it + // more likely that the cbn?z optimization, which can only apply to forward + // branches, will succeed. + for (ImmBranch &Br : reverse(ImmBranches)) { + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ? + MBB->getFallThrough() : + MBB->back().getOperand(0).getMBB(); + + ImmCompare Cmp; + if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) { + DestBB = ExitBB; + MadeChange = true; + } else { + FindCmpForCBZ(Br, Cmp, DestBB); + MadeChange |= TryShrinkBranch(Br); + } + + unsigned Opcode = Br.MI->getOpcode(); + if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc) continue; - unsigned Reg = CmpMI->getOperand(0).getReg(); + Register Reg = Cmp.MI->getOperand(0).getReg(); // Check for Kill flags on Reg. If they are present remove them and set kill // on the new CBZ. + auto *TRI = STI->getRegisterInfo(); MachineBasicBlock::iterator KillMI = Br.MI; bool RegKilled = false; do { @@ -1865,19 +1965,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() { RegKilled = true; break; } - } while (KillMI != CmpMI); + } while (KillMI != Cmp.MI); // Create the new CBZ/CBNZ - MachineBasicBlock *MBB = Br.MI->getParent(); - LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); + LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI); MachineInstr *NewBR = - BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc)) + BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc)) .addReg(Reg, getKillRegState(RegKilled)) .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); - CmpMI->eraseFromParent(); - Br.MI->eraseFromParent(); - Br.MI = NewBR; + + Cmp.MI->eraseFromParent(); + BBInfoVector &BBInfo = BBUtils->getBBInfo(); BBInfo[MBB->getNumber()].Size -= 2; + + if (Br.MI->getOpcode() == ARM::tBcc) { + Br.MI->eraseFromParent(); + Br.MI = NewBR; + } else if (&MBB->back() != Br.MI) { + // We've generated an LE and already erased the original conditional + // branch. The CBN?Z is now used to branch to the other successor, so an + // unconditional branch terminator is now redundant. + MachineInstr *LastMI = &MBB->back(); + if (LastMI != Br.MI) { + BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize(); + LastMI->eraseFromParent(); + } + } BBUtils->adjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; @@ -1931,8 +2044,8 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI, // of BaseReg, but only if the t2ADDrs can be removed. // + Some instruction other than t2ADDrs computing the entry. Not seen in // the wild, but we should be careful. - unsigned EntryReg = JumpMI->getOperand(0).getReg(); - unsigned BaseReg = LEAMI->getOperand(0).getReg(); + Register EntryReg = JumpMI->getOperand(0).getReg(); + Register BaseReg = LEAMI->getOperand(0).getReg(); CanDeleteLEA = true; BaseRegKill = false; @@ -2009,7 +2122,7 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI, // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg // and is not clobbered / used. MachineInstr *RemovableAdd = nullptr; - unsigned EntryReg = JumpMI->getOperand(0).getReg(); + Register EntryReg = JumpMI->getOperand(0).getReg(); // Find the last ADD to set EntryReg MachineBasicBlock::iterator I(LEAMI); @@ -2106,7 +2219,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // %idx = tLSLri %idx, 2 // %base = tLEApcrelJT // %t = tLDRr %base, %idx - unsigned BaseReg = User.MI->getOperand(0).getReg(); + Register BaseReg = User.MI->getOperand(0).getReg(); if (User.MI->getIterator() == User.MI->getParent()->begin()) continue; @@ -2116,7 +2229,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { !Shift->getOperand(2).isKill()) continue; IdxReg = Shift->getOperand(2).getReg(); - unsigned ShiftedIdxReg = Shift->getOperand(0).getReg(); + Register ShiftedIdxReg = Shift->getOperand(0).getReg(); // It's important that IdxReg is live until the actual TBB/TBH. Most of // the range is checked later, but the LEA might still clobber it and not @@ -2313,6 +2426,10 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineFunction::iterator MBBI = ++JTBB->getIterator(); MF->insert(MBBI, NewBB); + // Copy live-in information to new block. + for (const MachineBasicBlock::RegisterMaskPair &RegMaskPair : BB->liveins()) + NewBB->addLiveIn(RegMaskPair); + // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond directly to anything in the source. |