aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMConstantIslandPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMConstantIslandPass.cpp')
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp289
1 files changed, 203 insertions, 86 deletions
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 60e5d7bf6098..24ca25f73e96 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -26,8 +26,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -69,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+STATISTIC(NumLEInserted, "Number of LE backwards branches inserted");
static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
@@ -212,6 +215,7 @@ namespace {
const ARMBaseInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
+ MachineDominatorTree *DT = nullptr;
bool isThumb;
bool isThumb1;
bool isThumb2;
@@ -224,6 +228,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -238,7 +247,7 @@ namespace {
void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
bool BBHasFallthrough(MachineBasicBlock *MBB);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
- unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ Align getCPEAlign(const MachineInstr *CPEMI);
void scanFunctionJumpTables();
void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
@@ -327,8 +336,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
const BasicBlockInfo &BBI = BBInfo[J];
dbgs() << format("%08x %bb.%u\t", BBI.Offset, J)
<< " kb=" << unsigned(BBI.KnownBits)
- << " ua=" << unsigned(BBI.Unalign)
- << " pa=" << unsigned(BBI.PostAlign)
+ << " ua=" << unsigned(BBI.Unalign) << " pa=" << Log2(BBI.PostAlign)
<< format(" size=%#x\n", BBInfo[J].Size);
}
});
@@ -349,6 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
isPositionIndependentOrROPI =
STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
AFI = MF->getInfo<ARMFunctionInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
@@ -357,9 +366,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
HasFarJump = false;
bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
- // This pass invalidates liveness information when it splits basic blocks.
- MF->getRegInfo().invalidateLiveness();
-
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
MF->RenumberBlocks();
@@ -398,7 +404,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// Functions with jump tables need an alignment of 4 because they use the ADR
// instruction, which aligns the PC to 4 bytes before adding an offset.
if (!T2JumpTables.empty())
- MF->ensureAlignment(2);
+ MF->ensureAlignment(Align(4));
/// Remove dead constant pool entries.
MadeChange |= removeUnusedCPEntries();
@@ -487,8 +493,9 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
MF->push_back(BB);
- // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
- unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+ // MachineConstantPool measures alignment in bytes.
+ const Align MaxAlign(MCP->getConstantPoolAlignment());
+ const unsigned MaxLogAlign = Log2(MaxAlign);
// Mark the basic block as required by the const-pool.
BB->setAlignment(MaxAlign);
@@ -501,7 +508,8 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// alignment of all entries as long as BB is sufficiently aligned. Keep
// track of the insertion point for each alignment. We are going to bucket
// sort the entries as they are created.
- SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxLogAlign + 1,
+ BB->end());
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
@@ -526,7 +534,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// Ensure that future entries with higher alignment get inserted before
// CPEMI. This is bucket sort with iterators.
- for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ for (unsigned a = LogAlign + 1; a <= MaxLogAlign; ++a)
if (InsPoint[a] == InsAt)
InsPoint[a] = CPEMI;
@@ -640,29 +648,27 @@ ARMConstantIslands::findConstPoolEntry(unsigned CPI,
return nullptr;
}
-/// getCPELogAlign - Returns the required alignment of the constant pool entry
-/// represented by CPEMI. Alignment is measured in log2(bytes) units.
-unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+/// getCPEAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI.
+Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {
switch (CPEMI->getOpcode()) {
case ARM::CONSTPOOL_ENTRY:
break;
case ARM::JUMPTABLE_TBB:
- return isThumb1 ? 2 : 0;
+ return isThumb1 ? Align(4) : Align(1);
case ARM::JUMPTABLE_TBH:
- return isThumb1 ? 2 : 1;
+ return isThumb1 ? Align(4) : Align(2);
case ARM::JUMPTABLE_INSTS:
- return 1;
+ return Align(2);
case ARM::JUMPTABLE_ADDRS:
- return 2;
+ return Align(4);
default:
llvm_unreachable("unknown constpool entry kind");
}
unsigned CPI = getCombinedIndex(CPEMI);
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
- unsigned Align = MCP->getConstants()[CPI].getAlignment();
- assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
- return Log2_32(Align);
+ return Align(MCP->getConstants()[CPI].getAlignment());
}
/// scanFunctionJumpTables - Do a scan of the function, building up
@@ -687,7 +693,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
BBInfoVector &BBInfo = BBUtils->getBBInfo();
// The known bits of the entry block offset are determined by the function
// alignment.
- BBInfo.front().KnownBits = MF->getAlignment();
+ BBInfo.front().KnownBits = Log2(MF->getAlignment());
// Compute block offsets and known bits.
BBUtils->adjustBBOffsetsAfter(&MF->front());
@@ -824,11 +830,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
Scale = 2; // +-(offset_8*2)
NegOk = true;
break;
-
- case ARM::tLDRHi:
- Bits = 5;
- Scale = 2; // +(offset_5*2)
- break;
}
// Remember that this is a user of a CP entry.
@@ -885,6 +886,13 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
+ // Collect liveness information at MI.
+ LivePhysRegs LRs(*MF->getSubtarget().getRegisterInfo());
+ LRs.addLiveOuts(*OrigBB);
+ auto LivenessEnd = ++MachineBasicBlock::iterator(MI).getReverse();
+ for (MachineInstr &LiveMI : make_range(OrigBB->rbegin(), LivenessEnd))
+ LRs.stepBackward(LiveMI);
+
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
@@ -913,6 +921,12 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
// OrigBB branches to NewBB.
OrigBB->addSuccessor(NewBB);
+ // Update live-in information in the new block.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (MCPhysReg L : LRs)
+ if (!MRI.isReserved(L))
+ NewBB->addLiveIn(L);
+
// Update internal data structures to account for the newly inserted MBB.
// This is almost the same as updateForInsertedWaterBlock, except that
// the Water goes after OrigBB, not NewBB.
@@ -1007,13 +1021,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
MachineBasicBlock* Water, CPUser &U,
unsigned &Growth) {
BBInfoVector &BBInfo = BBUtils->getBBInfo();
- unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
- unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
- unsigned NextBlockOffset, NextBlockAlignment;
+ const Align CPEAlign = getCPEAlign(U.CPEMI);
+ const unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPEAlign);
+ unsigned NextBlockOffset;
+ Align NextBlockAlignment;
MachineFunction::const_iterator NextBlock = Water->getIterator();
if (++NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
- NextBlockAlignment = 0;
} else {
NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
NextBlockAlignment = NextBlock->getAlignment();
@@ -1028,13 +1042,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
Growth = CPEEnd - NextBlockOffset;
// Compute the padding that would go at the end of the CPE to align the next
// block.
- Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment);
+ Growth += offsetToAlignment(CPEEnd, NextBlockAlignment);
// If the CPE is to be inserted before the instruction, that will raise
// the offset of the instruction. Also account for unknown alignment padding
// in blocks between CPE and the user.
if (CPEOffset < UserOffset)
- UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), Log2(CPEAlign));
} else
// CPE fits in existing padding.
Growth = 0;
@@ -1200,8 +1214,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
// inserting islands between BB0 and BB1 makes other accesses out of range.
MachineBasicBlock *UserBB = U.MI->getParent();
BBInfoVector &BBInfo = BBUtils->getBBInfo();
- unsigned MinNoSplitDisp =
- BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI));
+ const Align CPEAlign = getCPEAlign(U.CPEMI);
+ unsigned MinNoSplitDisp = BBInfo[UserBB->getNumber()].postOffset(CPEAlign);
if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
return false;
for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
@@ -1254,7 +1268,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
- unsigned CPELogAlign = getCPELogAlign(CPEMI);
+ const Align CPEAlign = getCPEAlign(CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
BBInfoVector &BBInfo = BBUtils->getBBInfo();
const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
@@ -1267,7 +1281,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Size of branch to insert.
unsigned Delta = isThumb1 ? 2 : 4;
// Compute the offset where the CPE will begin.
- unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
+ unsigned CPEOffset = UserBBI.postOffset(CPEAlign) + Delta;
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB)
@@ -1308,11 +1322,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Try to split the block so it's fully aligned. Compute the latest split
// point where we can add a 4-byte branch instruction, and then align to
- // LogAlign which is the largest possible alignment in the function.
- unsigned LogAlign = MF->getAlignment();
- assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ // Align which is the largest possible alignment in the function.
+ const Align Align = MF->getAlignment();
+ assert(Align >= CPEAlign && "Over-aligned constant pool entry");
unsigned KnownBits = UserBBI.internalKnownBits();
- unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned UPad = UnknownPadding(Align, KnownBits);
unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x",
BaseInsertOffset));
@@ -1323,7 +1337,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
BaseInsertOffset -= 4;
LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
- << " la=" << LogAlign << " kb=" << KnownBits
+ << " la=" << Log2(Align) << " kb=" << KnownBits
<< " up=" << UPad << '\n');
// This could point off the end of the block if we've already got constant
@@ -1337,6 +1351,28 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
BaseInsertOffset =
std::max(UserBBI.postOffset() - UPad - 8,
UserOffset + TII->getInstSizeInBytes(*UserMI) + 1);
+ // If the CP is referenced(ie, UserOffset) is in first four instructions
+ // after IT, this recalculated BaseInsertOffset could be in the middle of
+ // an IT block. If it is, change the BaseInsertOffset to just after the
+ // IT block. This still make the CP Entry is in range becuase of the
+ // following reasons.
+ // 1. The initial BaseseInsertOffset calculated is (UserOffset +
+ // U.getMaxDisp() - UPad).
+ // 2. An IT block is only at most 4 instructions plus the "it" itself (18
+ // bytes).
+ // 3. All the relevant instructions support much larger Maximum
+ // displacement.
+ MachineBasicBlock::iterator I = UserMI;
+ ++I;
+ for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI),
+ PredReg = 0;
+ I->getOpcode() != ARM::t2IT &&
+ getITInstrPredicate(*I, PredReg) != ARMCC::AL;
+ Offset += TII->getInstSizeInBytes(*I), I = std::next(I)) {
+ BaseInsertOffset =
+ std::max(BaseInsertOffset, Offset + TII->getInstSizeInBytes(*I) + 1);
+ assert(I != UserMBB->end() && "Fell off end of block");
+ }
LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
}
unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1354,8 +1390,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUIndex];
if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
// Shift intertion point by one unit of alignment so it is within reach.
- BaseInsertOffset -= 1u << LogAlign;
- EndInsertOffset -= 1u << LogAlign;
+ BaseInsertOffset -= Align.value();
+ EndInsertOffset -= Align.value();
}
// This is overly conservative, as we don't account for CPEMIs being
// reused within the block, but it doesn't matter much. Also assume CPEs
@@ -1397,9 +1433,10 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
}
// We really must not split an IT block.
- LLVM_DEBUG(unsigned PredReg; assert(
- !isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL));
-
+#ifndef NDEBUG
+ unsigned PredReg;
+ assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL);
+#endif
NewMBB = splitBlockBeforeInstr(&*MI);
}
@@ -1464,9 +1501,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
// Always align the new block because CP entries can be smaller than 4
// bytes. Be careful not to decrease the existing alignment, e.g. NewMBB may
// be an already aligned constant pool block.
- const unsigned Align = isThumb ? 1 : 2;
- if (NewMBB->getAlignment() < Align)
- NewMBB->setAlignment(Align);
+ const Align Alignment = isThumb ? Align(2) : Align(4);
+ if (NewMBB->getAlignment() < Alignment)
+ NewMBB->setAlignment(Alignment);
// Remove the original WaterList entry; we want subsequent insertions in
// this vicinity to go after the one we're about to insert. This
@@ -1495,7 +1532,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
decrementCPEReferenceCount(CPI, CPEMI);
// Mark the basic block as aligned as required by the const-pool entry.
- NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+ NewIsland->setAlignment(getCPEAlign(U.CPEMI));
// Increase the size of the island block to account for the new entry.
BBUtils->adjustBBSize(NewIsland, Size);
@@ -1529,10 +1566,11 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
BBInfo[CPEBB->getNumber()].Size = 0;
// This block no longer needs to be aligned.
- CPEBB->setAlignment(0);
- } else
+ CPEBB->setAlignment(Align::None());
+ } else {
// Entries are sorted by descending alignment, so realign from the front.
- CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin()));
+ CPEBB->setAlignment(getCPEAlign(&*CPEBB->begin()));
+ }
BBUtils->adjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
@@ -1620,7 +1658,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
// L2:
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
CC = ARMCC::getOppositeCondition(CC);
- unsigned CCReg = MI->getOperand(2).getReg();
+ Register CCReg = MI->getOperand(2).getReg();
// If the branch is at the end of its MBB and that has a fall-through block,
// direct the updated conditional branch to the fall-through block. Otherwise,
@@ -1778,16 +1816,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
return MadeChange;
}
+
bool ARMConstantIslands::optimizeThumb2Branches() {
- bool MadeChange = false;
- // The order in which branches appear in ImmBranches is approximately their
- // order within the function body. By visiting later branches first, we reduce
- // the distance between earlier forward branches and their targets, making it
- // more likely that the cbn?z optimization, which can only apply to forward
- // branches, will succeed.
- for (unsigned i = ImmBranches.size(); i != 0; --i) {
- ImmBranch &Br = ImmBranches[i-1];
+ auto TryShrinkBranch = [this](ImmBranch &Br) {
unsigned Opcode = Br.MI->getOpcode();
unsigned NewOpc = 0;
unsigned Scale = 1;
@@ -1815,47 +1847,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
BBUtils->adjustBBSize(MBB, -2);
BBUtils->adjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
- MadeChange = true;
+ return true;
}
}
+ return false;
+ };
- Opcode = Br.MI->getOpcode();
- if (Opcode != ARM::tBcc)
- continue;
+ struct ImmCompare {
+ MachineInstr* MI = nullptr;
+ unsigned NewOpc = 0;
+ };
+
+ auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp,
+ MachineBasicBlock *DestBB) {
+ ImmCmp.MI = nullptr;
+ ImmCmp.NewOpc = 0;
// If the conditional branch doesn't kill CPSR, then CPSR can be liveout
// so this transformation is not safe.
if (!Br.MI->killsRegister(ARM::CPSR))
- continue;
+ return false;
- NewOpc = 0;
unsigned PredReg = 0;
+ unsigned NewOpc = 0;
ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
if (Pred == ARMCC::EQ)
NewOpc = ARM::tCBZ;
else if (Pred == ARMCC::NE)
NewOpc = ARM::tCBNZ;
- if (!NewOpc)
- continue;
- MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ else
+ return false;
+
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;
BBInfoVector &BBInfo = BBUtils->getBBInfo();
unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126)
- continue;
+ return false;
// Search backwards to find a tCMPi8
auto *TRI = STI->getRegisterInfo();
MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);
if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8)
+ return false;
+
+ ImmCmp.MI = CmpMI;
+ ImmCmp.NewOpc = NewOpc;
+ return true;
+ };
+
+ auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) {
+ if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() ||
+ STI->hasMinSize())
+ return false;
+
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) ||
+ !BBUtils->isBBInRange(Br.MI, DestBB, 4094))
+ return false;
+
+ if (!DT->dominates(DestBB, MBB))
+ return false;
+
+ // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite
+ // target of Br. So now we need to reverse the condition.
+ Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ;
+
+ MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(),
+ TII->get(ARM::t2LE));
+ MIB.add(Br.MI->getOperand(0));
+ Br.MI->eraseFromParent();
+ Br.MI = MIB;
+ ++NumLEInserted;
+ return true;
+ };
+
+ bool MadeChange = false;
+
+ // The order in which branches appear in ImmBranches is approximately their
+ // order within the function body. By visiting later branches first, we reduce
+ // the distance between earlier forward branches and their targets, making it
+ // more likely that the cbn?z optimization, which can only apply to forward
+ // branches, will succeed.
+ for (ImmBranch &Br : reverse(ImmBranches)) {
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ?
+ MBB->getFallThrough() :
+ MBB->back().getOperand(0).getMBB();
+
+ ImmCompare Cmp;
+ if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) {
+ DestBB = ExitBB;
+ MadeChange = true;
+ } else {
+ FindCmpForCBZ(Br, Cmp, DestBB);
+ MadeChange |= TryShrinkBranch(Br);
+ }
+
+ unsigned Opcode = Br.MI->getOpcode();
+ if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc)
continue;
- unsigned Reg = CmpMI->getOperand(0).getReg();
+ Register Reg = Cmp.MI->getOperand(0).getReg();
// Check for Kill flags on Reg. If they are present remove them and set kill
// on the new CBZ.
+ auto *TRI = STI->getRegisterInfo();
MachineBasicBlock::iterator KillMI = Br.MI;
bool RegKilled = false;
do {
@@ -1865,19 +1965,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
RegKilled = true;
break;
}
- } while (KillMI != CmpMI);
+ } while (KillMI != Cmp.MI);
// Create the new CBZ/CBNZ
- MachineBasicBlock *MBB = Br.MI->getParent();
- LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
+ LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI);
MachineInstr *NewBR =
- BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc))
.addReg(Reg, getKillRegState(RegKilled))
.addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
- CmpMI->eraseFromParent();
- Br.MI->eraseFromParent();
- Br.MI = NewBR;
+
+ Cmp.MI->eraseFromParent();
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
BBInfo[MBB->getNumber()].Size -= 2;
+
+ if (Br.MI->getOpcode() == ARM::tBcc) {
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ } else if (&MBB->back() != Br.MI) {
+ // We've generated an LE and already erased the original conditional
+ // branch. The CBN?Z is now used to branch to the other successor, so an
+ // unconditional branch terminator is now redundant.
+ MachineInstr *LastMI = &MBB->back();
+ if (LastMI != Br.MI) {
+ BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize();
+ LastMI->eraseFromParent();
+ }
+ }
BBUtils->adjustBBOffsetsAfter(MBB);
++NumCBZ;
MadeChange = true;
@@ -1931,8 +2044,8 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
// of BaseReg, but only if the t2ADDrs can be removed.
// + Some instruction other than t2ADDrs computing the entry. Not seen in
// the wild, but we should be careful.
- unsigned EntryReg = JumpMI->getOperand(0).getReg();
- unsigned BaseReg = LEAMI->getOperand(0).getReg();
+ Register EntryReg = JumpMI->getOperand(0).getReg();
+ Register BaseReg = LEAMI->getOperand(0).getReg();
CanDeleteLEA = true;
BaseRegKill = false;
@@ -2009,7 +2122,7 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
// but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg
// and is not clobbered / used.
MachineInstr *RemovableAdd = nullptr;
- unsigned EntryReg = JumpMI->getOperand(0).getReg();
+ Register EntryReg = JumpMI->getOperand(0).getReg();
// Find the last ADD to set EntryReg
MachineBasicBlock::iterator I(LEAMI);
@@ -2106,7 +2219,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// %idx = tLSLri %idx, 2
// %base = tLEApcrelJT
// %t = tLDRr %base, %idx
- unsigned BaseReg = User.MI->getOperand(0).getReg();
+ Register BaseReg = User.MI->getOperand(0).getReg();
if (User.MI->getIterator() == User.MI->getParent()->begin())
continue;
@@ -2116,7 +2229,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
!Shift->getOperand(2).isKill())
continue;
IdxReg = Shift->getOperand(2).getReg();
- unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
+ Register ShiftedIdxReg = Shift->getOperand(0).getReg();
// It's important that IdxReg is live until the actual TBB/TBH. Most of
// the range is checked later, but the LEA might still clobber it and not
@@ -2313,6 +2426,10 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
MachineFunction::iterator MBBI = ++JTBB->getIterator();
MF->insert(MBBI, NewBB);
+ // Copy live-in information to new block.
+ for (const MachineBasicBlock::RegisterMaskPair &RegMaskPair : BB->liveins())
+ NewBB->addLiveIn(RegMaskPair);
+
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond directly to anything in the source.