diff options
Diffstat (limited to 'lib/Target/ARM/ARMConstantIslandPass.cpp')
-rw-r--r-- | lib/Target/ARM/ARMConstantIslandPass.cpp | 160 |
1 files changed, 96 insertions, 64 deletions
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 55c1684028c2a..8511f67dccd5f 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -53,6 +53,11 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); +static cl::opt<unsigned> +CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30), + cl::desc("The max number of iteration for converge")); + + /// UnknownPadding - Return the worst case padding that could result from /// unknown offset bits. This does not include alignment padding caused by /// known offset bits. @@ -274,6 +279,11 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "ARM constant island placement and branch shortening pass"; } @@ -293,10 +303,10 @@ namespace { unsigned getCombinedIndex(const MachineInstr *CPEMI); int findInRangeCPEntry(CPUser& U, unsigned UserOffset); bool findAvailableWater(CPUser&U, unsigned UserOffset, - water_iterator &WaterIter); + water_iterator &WaterIter, bool CloserWater); void createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool handleConstantPoolUser(unsigned CPUserIndex); + bool handleConstantPoolUser(unsigned CPUserIndex, bool CloserWater); void removeDeadCPEMI(MachineInstr *CPEMI); bool removeUnusedCPEntries(); bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, @@ -456,8 +466,11 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= handleConstantPoolUser(i); - if (CPChange && ++NoCPIters > 30) + // For most inputs, it converges in no more than 5 iterations. + // If it doesn't end in 10, the input may have huge BB or many CPEs. + // In this case, we will try different heuristics. + CPChange |= handleConstantPoolUser(i, NoCPIters >= CPMaxIteration / 2); + if (CPChange && ++NoCPIters > CPMaxIteration) report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); @@ -478,10 +491,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MadeChange = true; } - // Shrink 32-bit Thumb2 branch, load, and store instructions. + // Shrink 32-bit Thumb2 load and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) MadeChange |= optimizeThumb2Instructions(); + // Shrink 32-bit branch instructions. + if (isThumb && STI->hasV8MBaselineOps()) + MadeChange |= optimizeThumb2Branches(); + + // Optimize jump tables using TBB / TBH. + if (isThumb2) + MadeChange |= optimizeThumb2JumpTables(); + // After a while, this might be made debug-only, but it is not expensive. verify(); @@ -654,7 +675,7 @@ bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { // have an unconditional branch for whatever reason. MachineBasicBlock *TBB, *FBB; SmallVector<MachineOperand, 4> Cond; - bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond); + bool TooDifficult = TII->analyzeBranch(*MBB, TBB, FBB, Cond); return TooDifficult || FBB == nullptr; } @@ -701,14 +722,10 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { /// information about the sizes of each block and the locations of all /// the jump tables. void ARMConstantIslands::scanFunctionJumpTables() { - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) - if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT) - T2JumpTables.push_back(I); + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &I : MBB) + if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT) + T2JumpTables.push_back(&I); } } @@ -735,22 +752,18 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { adjustBBOffsetsAfter(&MF->front()); // Now go back through the instructions and build up our data structures. - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - + for (MachineBasicBlock &MBB : *MF) { // If this block doesn't fall through into the next MBB, then this is // 'water' that a constant pool island could be placed. if (!BBHasFallthrough(&MBB)) WaterList.push_back(&MBB); - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) + for (MachineInstr &I : MBB) { + if (I.isDebugValue()) continue; - unsigned Opc = I->getOpcode(); - if (I->isBranch()) { + unsigned Opc = I.getOpcode(); + if (I.isBranch()) { bool isCond = false; unsigned Bits = 0; unsigned Scale = 1; @@ -759,7 +772,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { default: continue; // Ignore other JT branches case ARM::t2BR_JT: - T2JumpTables.push_back(I); + T2JumpTables.push_back(&I); continue; // Does not get an entry in ImmBranches case ARM::Bcc: isCond = true; @@ -793,11 +806,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { // Record this immediate branch. unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; - ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc)); + ImmBranches.push_back(ImmBranch(&I, MaxOffs, isCond, UOpc)); } if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) - PushPopMIs.push_back(I); + PushPopMIs.push_back(&I); if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS || Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB || @@ -805,8 +818,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { continue; // Scan the instructions for constant pool operands. - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) - if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) { + for (unsigned op = 0, e = I.getNumOperands(); op != e; ++op) + if (I.getOperand(op).isCPI() || I.getOperand(op).isJTI()) { // We found one. The addressing mode tells us the max displacement // from the PC that this instruction permits. @@ -865,15 +878,15 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { } // Remember that this is a user of a CP entry. - unsigned CPI = I->getOperand(op).getIndex(); - if (I->getOperand(op).isJTI()) { + unsigned CPI = I.getOperand(op).getIndex(); + if (I.getOperand(op).isJTI()) { JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size())); CPI = JumpTableEntryIndices[CPI]; } MachineInstr *CPEMI = CPEMIs[CPI]; unsigned MaxOffs = ((1 << Bits)-1) * Scale; - CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); + CPUsers.push_back(CPUser(&I, CPEMI, MaxOffs, NegOk, IsSoImm)); // Increment corresponding CPEntry reference count. CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); @@ -896,15 +909,14 @@ void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { BBI.Unalign = 0; BBI.PostAlign = 0; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { + for (MachineInstr &I : *MBB) { BBI.Size += TII->GetInstSizeInBytes(I); // For inline asm, GetInstSizeInBytes returns a conservative estimate. // The actual size may be smaller, but still a multiple of the instr size. - if (I->isInlineAsm()) + if (I.isInlineAsm()) BBI.Unalign = isThumb ? 1 : 2; // Also consider instructions that may be shrunk later. - else if (isThumb && mayOptimizeThumb2Instruction(I)) + else if (isThumb && mayOptimizeThumb2Instruction(&I)) BBI.Unalign = 1; } @@ -929,7 +941,7 @@ unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const { // Sum instructions before MI in MBB. for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->GetInstSizeInBytes(I); + Offset += TII->GetInstSizeInBytes(*I); } return Offset; } @@ -1108,7 +1120,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, Growth = CPEEnd - NextBlockOffset; // Compute the padding that would go at the end of the CPE to align the next // block. - Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); + Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise // the offset of the instruction. Also account for unknown alignment padding @@ -1285,11 +1297,27 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { /// move to a lower address, so search backward from the end of the list and /// prefer the first water that is in range. bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, - water_iterator &WaterIter) { + water_iterator &WaterIter, + bool CloserWater) { if (WaterList.empty()) return false; unsigned BestGrowth = ~0u; + // The nearest water without splitting the UserBB is right after it. + // If the distance is still large (we have a big BB), then we need to split it + // if we don't converge after certain iterations. This helps the following + // situation to converge: + // BB0: + // Big BB + // BB1: + // Constant Pool + // When a CP access is out of range, BB0 may be used as water. However, + // inserting islands between BB0 and BB1 makes other accesses out of range. + MachineBasicBlock *UserBB = U.MI->getParent(); + unsigned MinNoSplitDisp = + BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI)); + if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2) + return false; for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();; --IP) { MachineBasicBlock* WaterBB = *IP; @@ -1301,6 +1329,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, // should be relatively uncommon and when it does happen, we want to be // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. + // When CloserWater is true, we try to find the lowest address after (or + // equal to) user MI's BB no matter of padding growth. unsigned Growth; if (isWaterInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || @@ -1312,8 +1342,11 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() << " Growth=" << Growth << '\n'); - // Keep looking unless it is perfect. - if (BestGrowth == 0) + if (CloserWater && WaterBB == U.MI->getParent()) + return true; + // Keep looking unless it is perfect and we're not looking for the lowest + // possible address. + if (!CloserWater && BestGrowth == 0) return true; } if (IP == B) @@ -1416,7 +1449,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // iterates at least once. BaseInsertOffset = std::max(UserBBI.postOffset() - UPad - 8, - UserOffset + TII->GetInstSizeInBytes(UserMI) + 1); + UserOffset + TII->GetInstSizeInBytes(*UserMI) + 1); DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); } unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1426,11 +1459,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned CPUIndex = CPUserIndex+1; unsigned NumCPUsers = CPUsers.size(); MachineInstr *LastIT = nullptr; - for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); + for (unsigned Offset = UserOffset + TII->GetInstSizeInBytes(*UserMI); Offset < BaseInsertOffset; - Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) { + Offset += TII->GetInstSizeInBytes(*MI), MI = std::next(MI)) { assert(MI != UserMBB->end() && "Fell off end of block"); - if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) { CPUser &U = CPUsers[CPUIndex]; if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. @@ -1447,7 +1480,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Remember the last IT instruction. if (MI->getOpcode() == ARM::t2IT) - LastIT = MI; + LastIT = &*MI; } --MI; @@ -1455,23 +1488,24 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Avoid splitting an IT block. if (LastIT) { unsigned PredReg = 0; - ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg); if (CC != ARMCC::AL) MI = LastIT; } // We really must not split an IT block. DEBUG(unsigned PredReg; - assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL)); + assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL)); - NewMBB = splitBlockBeforeInstr(MI); + NewMBB = splitBlockBeforeInstr(&*MI); } /// handleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { +bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, + bool CloserWater) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; @@ -1494,7 +1528,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; - if (findAvailableWater(U, UserOffset, IP)) { + if (findAvailableWater(U, UserOffset, IP, CloserWater)) { DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; @@ -1584,7 +1618,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { CPEBB->setAlignment(0); } else // Entries are sorted by descending alignment, so realign from the front. - CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); + CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin())); adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if @@ -1728,7 +1762,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. - int delta = TII->GetInstSizeInBytes(&MBB->back()); + int delta = TII->GetInstSizeInBytes(MBB->back()); BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); // BBInfo[SplitBB].Offset is wrong temporarily, fixed below @@ -1744,18 +1778,18 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); - BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back()); if (isThumb) BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) .addImm(ARMCC::AL).addReg(0); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); - BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); // Remove the old conditional branch. It may or may not still be in MBB. - BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); + BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI); MI->eraseFromParent(); adjustBBOffsetsAfter(MBB); return true; @@ -1852,8 +1886,6 @@ bool ARMConstantIslands::optimizeThumb2Instructions() { } } - MadeChange |= optimizeThumb2Branches(); - MadeChange |= optimizeThumb2JumpTables(); return MadeChange; } @@ -1910,7 +1942,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() { NewOpc = 0; unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) @@ -1928,7 +1960,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() { --CmpMI; if (CmpMI->getOpcode() == ARM::tCMPi8) { unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = getInstrPredicate(CmpMI, PredReg); + Pred = getInstrPredicate(*CmpMI, PredReg); if (Pred == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && isARMLowRegister(Reg)) { @@ -2170,8 +2202,8 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { } } - unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); - unsigned OrigSize = TII->GetInstSizeInBytes(MI); + unsigned NewSize = TII->GetInstSizeInBytes(*NewJTMI); + unsigned OrigSize = TII->GetInstSizeInBytes(*MI); MI->eraseFromParent(); int Delta = OrigSize - NewSize + DeadSize; @@ -2240,13 +2272,13 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineFunction::iterator OldPrior = std::prev(BBi); // If the block terminator isn't analyzable, don't try to move the block - bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond); + bool B = TII->analyzeBranch(*BB, TBB, FBB, Cond); // If the block ends in an unconditional branch, move it. The prior block // has to have an analyzable terminator for us to move this one. Be paranoid // and make sure we're not trying to move the entry block of the function. - if (!B && Cond.empty() && BB != MF->begin() && - !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { + if (!B && Cond.empty() && BB != &MF->front() && + !TII->analyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { BB->moveAfter(JTBB); OldPrior->updateTerminator(); BB->updateTerminator(); |