summaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMConstantIslandPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMConstantIslandPass.cpp')
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp160
1 files changed, 96 insertions, 64 deletions
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 55c1684028c2a..8511f67dccd5f 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -53,6 +53,11 @@ static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
+static cl::opt<unsigned>
+CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
+ cl::desc("The max number of iteration for converge"));
+
+
/// UnknownPadding - Return the worst case padding that could result from
/// unknown offset bits. This does not include alignment padding caused by
/// known offset bits.
@@ -274,6 +279,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return "ARM constant island placement and branch shortening pass";
}
@@ -293,10 +303,10 @@ namespace {
unsigned getCombinedIndex(const MachineInstr *CPEMI);
int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
bool findAvailableWater(CPUser&U, unsigned UserOffset,
- water_iterator &WaterIter);
+ water_iterator &WaterIter, bool CloserWater);
void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
MachineBasicBlock *&NewMBB);
- bool handleConstantPoolUser(unsigned CPUserIndex);
+ bool handleConstantPoolUser(unsigned CPUserIndex, bool CloserWater);
void removeDeadCPEMI(MachineInstr *CPEMI);
bool removeUnusedCPEntries();
bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
@@ -456,8 +466,11 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
bool CPChange = false;
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
- CPChange |= handleConstantPoolUser(i);
- if (CPChange && ++NoCPIters > 30)
+ // For most inputs, it converges in no more than 5 iterations.
+ // If it doesn't end in 10, the input may have huge BB or many CPEs.
+ // In this case, we will try different heuristics.
+ CPChange |= handleConstantPoolUser(i, NoCPIters >= CPMaxIteration / 2);
+ if (CPChange && ++NoCPIters > CPMaxIteration)
report_fatal_error("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
@@ -478,10 +491,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MadeChange = true;
}
- // Shrink 32-bit Thumb2 branch, load, and store instructions.
+ // Shrink 32-bit Thumb2 load and store instructions.
if (isThumb2 && !STI->prefers32BitThumb())
MadeChange |= optimizeThumb2Instructions();
+ // Shrink 32-bit branch instructions.
+ if (isThumb && STI->hasV8MBaselineOps())
+ MadeChange |= optimizeThumb2Branches();
+
+ // Optimize jump tables using TBB / TBH.
+ if (isThumb2)
+ MadeChange |= optimizeThumb2JumpTables();
+
// After a while, this might be made debug-only, but it is not expensive.
verify();
@@ -654,7 +675,7 @@ bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
// have an unconditional branch for whatever reason.
MachineBasicBlock *TBB, *FBB;
SmallVector<MachineOperand, 4> Cond;
- bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
+ bool TooDifficult = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
return TooDifficult || FBB == nullptr;
}
@@ -701,14 +722,10 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
/// information about the sizes of each block and the locations of all
/// the jump tables.
void ARMConstantIslands::scanFunctionJumpTables() {
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I)
- if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
- T2JumpTables.push_back(I);
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &I : MBB)
+ if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(&I);
}
}
@@ -735,22 +752,18 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
adjustBBOffsetsAfter(&MF->front());
// Now go back through the instructions and build up our data structures.
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
-
+ for (MachineBasicBlock &MBB : *MF) {
// If this block doesn't fall through into the next MBB, then this is
// 'water' that a constant pool island could be placed.
if (!BBHasFallthrough(&MBB))
WaterList.push_back(&MBB);
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- if (I->isDebugValue())
+ for (MachineInstr &I : MBB) {
+ if (I.isDebugValue())
continue;
- unsigned Opc = I->getOpcode();
- if (I->isBranch()) {
+ unsigned Opc = I.getOpcode();
+ if (I.isBranch()) {
bool isCond = false;
unsigned Bits = 0;
unsigned Scale = 1;
@@ -759,7 +772,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
default:
continue; // Ignore other JT branches
case ARM::t2BR_JT:
- T2JumpTables.push_back(I);
+ T2JumpTables.push_back(&I);
continue; // Does not get an entry in ImmBranches
case ARM::Bcc:
isCond = true;
@@ -793,11 +806,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Record this immediate branch.
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
- ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ ImmBranches.push_back(ImmBranch(&I, MaxOffs, isCond, UOpc));
}
if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
- PushPopMIs.push_back(I);
+ PushPopMIs.push_back(&I);
if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
@@ -805,8 +818,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
continue;
// Scan the instructions for constant pool operands.
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
- if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
+ for (unsigned op = 0, e = I.getNumOperands(); op != e; ++op)
+ if (I.getOperand(op).isCPI() || I.getOperand(op).isJTI()) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
@@ -865,15 +878,15 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
}
// Remember that this is a user of a CP entry.
- unsigned CPI = I->getOperand(op).getIndex();
- if (I->getOperand(op).isJTI()) {
+ unsigned CPI = I.getOperand(op).getIndex();
+ if (I.getOperand(op).isJTI()) {
JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
CPI = JumpTableEntryIndices[CPI];
}
MachineInstr *CPEMI = CPEMIs[CPI];
unsigned MaxOffs = ((1 << Bits)-1) * Scale;
- CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+ CPUsers.push_back(CPUser(&I, CPEMI, MaxOffs, NegOk, IsSoImm));
// Increment corresponding CPEntry reference count.
CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
@@ -896,15 +909,14 @@ void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
BBI.Unalign = 0;
BBI.PostAlign = 0;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
+ for (MachineInstr &I : *MBB) {
BBI.Size += TII->GetInstSizeInBytes(I);
// For inline asm, GetInstSizeInBytes returns a conservative estimate.
// The actual size may be smaller, but still a multiple of the instr size.
- if (I->isInlineAsm())
+ if (I.isInlineAsm())
BBI.Unalign = isThumb ? 1 : 2;
// Also consider instructions that may be shrunk later.
- else if (isThumb && mayOptimizeThumb2Instruction(I))
+ else if (isThumb && mayOptimizeThumb2Instruction(&I))
BBI.Unalign = 1;
}
@@ -929,7 +941,7 @@ unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
// Sum instructions before MI in MBB.
for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
- Offset += TII->GetInstSizeInBytes(I);
+ Offset += TII->GetInstSizeInBytes(*I);
}
return Offset;
}
@@ -1108,7 +1120,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
Growth = CPEEnd - NextBlockOffset;
// Compute the padding that would go at the end of the CPE to align the next
// block.
- Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+ Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment);
// If the CPE is to be inserted before the instruction, that will raise
// the offset of the instruction. Also account for unknown alignment padding
@@ -1285,11 +1297,27 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
/// move to a lower address, so search backward from the end of the list and
/// prefer the first water that is in range.
bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
- water_iterator &WaterIter) {
+ water_iterator &WaterIter,
+ bool CloserWater) {
if (WaterList.empty())
return false;
unsigned BestGrowth = ~0u;
+ // The nearest water without splitting the UserBB is right after it.
+ // If the distance is still large (we have a big BB), then we need to split it
+ // if we don't converge after certain iterations. This helps the following
+ // situation to converge:
+ // BB0:
+ // Big BB
+ // BB1:
+ // Constant Pool
+ // When a CP access is out of range, BB0 may be used as water. However,
+ // inserting islands between BB0 and BB1 makes other accesses out of range.
+ MachineBasicBlock *UserBB = U.MI->getParent();
+ unsigned MinNoSplitDisp =
+ BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI));
+ if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
+ return false;
for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
--IP) {
MachineBasicBlock* WaterBB = *IP;
@@ -1301,6 +1329,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
// should be relatively uncommon and when it does happen, we want to be
// sure to take advantage of it for all the CPEs near that block, so that
// we don't insert more branches than necessary.
+ // When CloserWater is true, we try to find the lowest address after (or
+ // equal to) user MI's BB no matter of padding growth.
unsigned Growth;
if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
@@ -1312,8 +1342,11 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
<< " Growth=" << Growth << '\n');
- // Keep looking unless it is perfect.
- if (BestGrowth == 0)
+ if (CloserWater && WaterBB == U.MI->getParent())
+ return true;
+ // Keep looking unless it is perfect and we're not looking for the lowest
+ // possible address.
+ if (!CloserWater && BestGrowth == 0)
return true;
}
if (IP == B)
@@ -1416,7 +1449,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// iterates at least once.
BaseInsertOffset =
std::max(UserBBI.postOffset() - UPad - 8,
- UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
+ UserOffset + TII->GetInstSizeInBytes(*UserMI) + 1);
DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
}
unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1426,11 +1459,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned CPUIndex = CPUserIndex+1;
unsigned NumCPUsers = CPUsers.size();
MachineInstr *LastIT = nullptr;
- for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ for (unsigned Offset = UserOffset + TII->GetInstSizeInBytes(*UserMI);
Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) {
+ Offset += TII->GetInstSizeInBytes(*MI), MI = std::next(MI)) {
assert(MI != UserMBB->end() && "Fell off end of block");
- if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) {
CPUser &U = CPUsers[CPUIndex];
if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
// Shift intertion point by one unit of alignment so it is within reach.
@@ -1447,7 +1480,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Remember the last IT instruction.
if (MI->getOpcode() == ARM::t2IT)
- LastIT = MI;
+ LastIT = &*MI;
}
--MI;
@@ -1455,23 +1488,24 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Avoid splitting an IT block.
if (LastIT) {
unsigned PredReg = 0;
- ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
if (CC != ARMCC::AL)
MI = LastIT;
}
// We really must not split an IT block.
DEBUG(unsigned PredReg;
- assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
+ assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL));
- NewMBB = splitBlockBeforeInstr(MI);
+ NewMBB = splitBlockBeforeInstr(&*MI);
}
/// handleConstantPoolUser - Analyze the specified user, checking to see if it
/// is out-of-range. If so, pick up the constant pool value and move it some
/// place in-range. Return true if we changed any addresses (thus must run
/// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
+bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
+ bool CloserWater) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
@@ -1494,7 +1528,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
water_iterator IP;
- if (findAvailableWater(U, UserOffset, IP)) {
+ if (findAvailableWater(U, UserOffset, IP, CloserWater)) {
DEBUG(dbgs() << "Found water in range\n");
MachineBasicBlock *WaterBB = *IP;
@@ -1584,7 +1618,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
CPEBB->setAlignment(0);
} else
// Entries are sorted by descending alignment, so realign from the front.
- CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+ CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin()));
adjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
@@ -1728,7 +1762,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
splitBlockBeforeInstr(MI);
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
- int delta = TII->GetInstSizeInBytes(&MBB->back());
+ int delta = TII->GetInstSizeInBytes(MBB->back());
BBInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
@@ -1744,18 +1778,18 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
.addImm(ARMCC::AL).addReg(0);
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI);
MI->eraseFromParent();
adjustBBOffsetsAfter(MBB);
return true;
@@ -1852,8 +1886,6 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
}
}
- MadeChange |= optimizeThumb2Branches();
- MadeChange |= optimizeThumb2JumpTables();
return MadeChange;
}
@@ -1910,7 +1942,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
NewOpc = 0;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
if (Pred == ARMCC::EQ)
NewOpc = ARM::tCBZ;
else if (Pred == ARMCC::NE)
@@ -1928,7 +1960,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
--CmpMI;
if (CmpMI->getOpcode() == ARM::tCMPi8) {
unsigned Reg = CmpMI->getOperand(0).getReg();
- Pred = getInstrPredicate(CmpMI, PredReg);
+ Pred = getInstrPredicate(*CmpMI, PredReg);
if (Pred == ARMCC::AL &&
CmpMI->getOperand(1).getImm() == 0 &&
isARMLowRegister(Reg)) {
@@ -2170,8 +2202,8 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
}
}
- unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
- unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+ unsigned NewSize = TII->GetInstSizeInBytes(*NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(*MI);
MI->eraseFromParent();
int Delta = OrigSize - NewSize + DeadSize;
@@ -2240,13 +2272,13 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
MachineFunction::iterator OldPrior = std::prev(BBi);
// If the block terminator isn't analyzable, don't try to move the block
- bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+ bool B = TII->analyzeBranch(*BB, TBB, FBB, Cond);
// If the block ends in an unconditional branch, move it. The prior block
// has to have an analyzable terminator for us to move this one. Be paranoid
// and make sure we're not trying to move the entry block of the function.
- if (!B && Cond.empty() && BB != MF->begin() &&
- !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ if (!B && Cond.empty() && BB != &MF->front() &&
+ !TII->analyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
BB->moveAfter(JTBB);
OldPrior->updateTerminator();
BB->updateTerminator();