diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
| -rw-r--r-- | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 373 |
1 files changed, 74 insertions, 299 deletions
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index dd2ea6a9dbd6..dcb05601e5f4 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -38,7 +38,6 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded"); STATISTIC(NumPreFolded, "Number of pre-index updates folded"); STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); -STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); @@ -51,14 +50,6 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); -static cl::opt<bool> EnableNarrowLdMerge("enable-narrow-ld-merge", cl::Hidden, - cl::init(false), - cl::desc("Enable narrow load merge")); - -namespace llvm { -void initializeAArch64LoadStoreOptPass(PassRegistry &); -} - #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" namespace { @@ -111,11 +102,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, MachineBasicBlock::iterator &StoreI); - // Merge the two instructions indicated into a wider instruction. + // Merge the two instructions indicated into a wider narrow store instruction. MachineBasicBlock::iterator - mergeNarrowInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator MergeMI, - const LdStPairFlags &Flags); + mergeNarrowZeroStores(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator MergeMI, + const LdStPairFlags &Flags); // Merge the two instructions indicated into a single pair-wise instruction. MachineBasicBlock::iterator @@ -151,8 +142,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx); - // Find and merge foldable ldr/str instructions. - bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI); + // Find and merge zero store instructions. + bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); // Find and pair ldr/str instructions. bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); @@ -160,18 +151,16 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and promote load instructions which read directly from store. bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); - bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt); + bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { - return AARCH64_LOAD_STORE_OPT_NAME; - } + StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; } }; char AArch64LoadStoreOpt::ID = 0; } // namespace @@ -179,23 +168,6 @@ char AArch64LoadStoreOpt::ID = 0; INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) -static unsigned getBitExtrOpcode(MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode."); - case AArch64::LDRBBui: - case AArch64::LDURBBi: - case AArch64::LDRHHui: - case AArch64::LDURHHi: - return AArch64::UBFMWri; - case AArch64::LDRSBWui: - case AArch64::LDURSBWi: - case AArch64::LDRSHWui: - case AArch64::LDURSHWi: - return AArch64::SBFMWri; - } -} - static bool isNarrowStore(unsigned Opc) { switch (Opc) { default: @@ -208,30 +180,6 @@ static bool isNarrowStore(unsigned Opc) { } } -static bool isNarrowLoad(unsigned Opc) { - switch (Opc) { - default: - return false; - case AArch64::LDRHHui: - case AArch64::LDURHHi: - case AArch64::LDRBBui: - case AArch64::LDURBBi: - case AArch64::LDRSHWui: - case AArch64::LDURSHWi: - case AArch64::LDRSBWui: - case AArch64::LDURSBWi: - return true; - } -} - -static bool isNarrowLoad(MachineInstr &MI) { - return isNarrowLoad(MI.getOpcode()); -} - -static bool isNarrowLoadOrStore(unsigned Opc) { - return isNarrowLoad(Opc) || isNarrowStore(Opc); -} - // Scaling factor for unscaled load or store. static int getMemScale(MachineInstr &MI) { switch (MI.getOpcode()) { @@ -323,23 +271,11 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, case AArch64::STURSi: case AArch64::LDRSui: case AArch64::LDURSi: - case AArch64::LDRHHui: - case AArch64::LDURHHi: - case AArch64::LDRBBui: - case AArch64::LDURBBi: return Opc; case AArch64::LDRSWui: return AArch64::LDRWui; case AArch64::LDURSWi: return AArch64::LDURWi; - case AArch64::LDRSBWui: - return AArch64::LDRBBui; - case AArch64::LDRSHWui: - return AArch64::LDRHHui; - case AArch64::LDURSBWi: - return AArch64::LDURBBi; - case AArch64::LDURSHWi: - return AArch64::LDURHHi; } } @@ -359,18 +295,6 @@ static unsigned getMatchingWideOpcode(unsigned Opc) { return AArch64::STURXi; case AArch64::STRWui: return AArch64::STRXui; - case AArch64::LDRHHui: - case AArch64::LDRSHWui: - return AArch64::LDRWui; - case AArch64::LDURHHi: - case AArch64::LDURSHWi: - return AArch64::LDURWi; - case AArch64::LDRBBui: - case AArch64::LDRSBWui: - return AArch64::LDRHHui; - case AArch64::LDURBBi: - case AArch64::LDURSBWi: - return AArch64::LDURHHi; } } @@ -614,23 +538,20 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); } -static bool isPromotableZeroStoreOpcode(unsigned Opc) { - return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi; -} - -static bool isPromotableZeroStoreOpcode(MachineInstr &MI) { - return isPromotableZeroStoreOpcode(MI.getOpcode()); -} - static bool isPromotableZeroStoreInst(MachineInstr &MI) { - return (isPromotableZeroStoreOpcode(MI)) && + unsigned Opc = MI.getOpcode(); + return (Opc == AArch64::STRWui || Opc == AArch64::STURWi || + isNarrowStore(Opc)) && getLdStRegOp(MI).getReg() == AArch64::WZR; } MachineBasicBlock::iterator -AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator MergeMI, - const LdStPairFlags &Flags) { +AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator MergeMI, + const LdStPairFlags &Flags) { + assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && + "Expected promotable zero stores."); + MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -654,15 +575,12 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I); // Which register is Rt and which is Rt2 depends on the offset order. - MachineInstr *RtMI, *Rt2MI; + MachineInstr *RtMI; if (getLdStOffsetOp(*I).getImm() == - getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) { + getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) RtMI = &*MergeMI; - Rt2MI = &*I; - } else { + else RtMI = &*I; - Rt2MI = &*MergeMI; - } int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); // Change the scaled offset from small to large type. @@ -671,105 +589,9 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, OffsetImm /= 2; } + // Construct the new instruction. DebugLoc DL = I->getDebugLoc(); MachineBasicBlock *MBB = I->getParent(); - if (isNarrowLoad(Opc)) { - MachineInstr *RtNewDest = &*(MergeForward ? I : MergeMI); - // When merging small (< 32 bit) loads for big-endian targets, the order of - // the component parts gets swapped. - if (!Subtarget->isLittleEndian()) - std::swap(RtMI, Rt2MI); - // Construct the new load instruction. - MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2; - NewMemMI = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) - .addOperand(getLdStRegOp(*RtNewDest)) - .addOperand(BaseRegOp) - .addImm(OffsetImm) - .setMemRefs(I->mergeMemRefsWith(*MergeMI)); - (void)NewMemMI; - - DEBUG( - dbgs() - << "Creating the new load and extract. Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(MergeMI->print(dbgs())); - DEBUG(dbgs() << " with instructions:\n "); - DEBUG((NewMemMI)->print(dbgs())); - - int Width = getMemScale(*I) == 1 ? 8 : 16; - int LSBLow = 0; - int LSBHigh = Width; - int ImmsLow = LSBLow + Width - 1; - int ImmsHigh = LSBHigh + Width - 1; - MachineInstr *ExtDestMI = &*(MergeForward ? MergeMI : I); - if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) { - // Create the bitfield extract for high bits. - BitExtMI1 = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI))) - .addOperand(getLdStRegOp(*Rt2MI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(LSBHigh) - .addImm(ImmsHigh); - // Create the bitfield extract for low bits. - if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { - // For unsigned, prefer to use AND for low bits. - BitExtMI2 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri)) - .addOperand(getLdStRegOp(*RtMI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(ImmsLow); - } else { - BitExtMI2 = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI))) - .addOperand(getLdStRegOp(*RtMI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(LSBLow) - .addImm(ImmsLow); - } - } else { - // Create the bitfield extract for low bits. - if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { - // For unsigned, prefer to use AND for low bits. - BitExtMI1 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri)) - .addOperand(getLdStRegOp(*RtMI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(ImmsLow); - } else { - BitExtMI1 = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI))) - .addOperand(getLdStRegOp(*RtMI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(LSBLow) - .addImm(ImmsLow); - } - - // Create the bitfield extract for high bits. - BitExtMI2 = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI))) - .addOperand(getLdStRegOp(*Rt2MI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(LSBHigh) - .addImm(ImmsHigh); - } - (void)BitExtMI1; - (void)BitExtMI2; - - DEBUG(dbgs() << " "); - DEBUG((BitExtMI1)->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG((BitExtMI2)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions. - I->eraseFromParent(); - MergeMI->eraseFromParent(); - return NextI; - } - assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && - "Expected promotable zero store"); - - // Construct the new instruction. MachineInstrBuilder MIB; MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) @@ -778,7 +600,7 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, .setMemRefs(I->mergeMemRefsWith(*MergeMI)); (void)MIB; - DEBUG(dbgs() << "Creating wider load/store. Replacing instructions:\n "); + DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(MergeMI->print(dbgs())); @@ -945,6 +767,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, // Remove the load, if the destination register of the loads is the same // register for stored value. if (StRt == LdRt && LoadSize == 8) { + StoreI->clearRegisterKills(StRt, TRI); DEBUG(dbgs() << "Remove load instruction:\n "); DEBUG(LoadI->print(dbgs())); DEBUG(dbgs() << "\n"); @@ -1009,6 +832,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, .addImm(Imms); } } + StoreI->clearRegisterKills(StRt, TRI); + (void)BitExtMI; DEBUG(dbgs() << "Promoting load by replacing :\n "); @@ -1041,8 +866,10 @@ static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, if (!Reg) continue; if (MO.isDef()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - ModifiedRegs.set(*AI); + // WZR/XZR are not modified even when used as a destination register. + if (Reg != AArch64::WZR && Reg != AArch64::XZR) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + ModifiedRegs.set(*AI); } else { assert(MO.isUse() && "Reg operand not a def and not a use?!?"); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) @@ -1118,8 +945,9 @@ bool AArch64LoadStoreOpt::findMatchingStore( --MBBI; MachineInstr &MI = *MBBI; - // Don't count DBG_VALUE instructions towards the search limit. - if (!MI.isDebugValue()) + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) ++Count; // If the load instruction reads directly from the address to which the @@ -1184,13 +1012,14 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, return true; } - // If the second instruction isn't even a load/store, bail out. + // If the second instruction isn't even a mergable/pairable load/store, bail + // out. if (!PairIsValidLdStrOpc) return false; - // FIXME: We don't support merging narrow loads/stores with mixed - // scaled/unscaled offsets. - if (isNarrowLoadOrStore(OpcA) || isNarrowLoadOrStore(OpcB)) + // FIXME: We don't support merging narrow stores with mixed scaled/unscaled + // offsets. + if (isNarrowStore(OpcA) || isNarrowStore(OpcB)) return false; // Try to match an unscaled load/store with a scaled load/store. @@ -1229,13 +1058,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI.isDebugValue()) - continue; - // Now that we know this is a real instruction, count it. - ++Count; + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; Flags.setSExtIdx(-1); if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) && @@ -1505,12 +1332,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( ++MBBI; for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; - // Skip DBG_VALUE instructions. - if (MI.isDebugValue()) - continue; - // Now that we know this is a real instruction, count it. - ++Count; + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; // If we found a match, return it. if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset)) @@ -1559,8 +1385,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( --MBBI; MachineInstr &MI = *MBBI; - // Don't count DBG_VALUE instructions towards the search limit. - if (!MI.isDebugValue()) + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) ++Count; // If we found a match, return it. @@ -1603,37 +1430,26 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( return false; } -// Find narrow loads that can be converted into a single wider load with -// bitfield extract instructions. Also merge adjacent zero stores into a wider -// store. -bool AArch64LoadStoreOpt::tryToMergeLdStInst( +// Merge adjacent zero stores into a wider store. +bool AArch64LoadStoreOpt::tryToMergeZeroStInst( MachineBasicBlock::iterator &MBBI) { - assert((isNarrowLoad(*MBBI) || isPromotableZeroStoreOpcode(*MBBI)) && - "Expected narrow op."); + assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store."); MachineInstr &MI = *MBBI; MachineBasicBlock::iterator E = MI.getParent()->end(); if (!TII->isCandidateToMergeOrPair(MI)) return false; - // For promotable zero stores, the stored value should be WZR. - if (isPromotableZeroStoreOpcode(MI) && - getLdStRegOp(MI).getReg() != AArch64::WZR) - return false; - // Look ahead up to LdStLimit instructions for a mergable instruction. LdStPairFlags Flags; MachineBasicBlock::iterator MergeMI = findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true); if (MergeMI != E) { - if (isNarrowLoad(MI)) { - ++NumNarrowLoadsPromoted; - } else if (isPromotableZeroStoreInst(MI)) { - ++NumZeroStoresPromoted; - } + ++NumZeroStoresPromoted; + // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. - MBBI = mergeNarrowInsns(MBBI, MergeMI, Flags); + MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags); return true; } return false; @@ -1674,7 +1490,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { } bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, - bool enableNarrowLdOpt) { + bool EnableNarrowZeroStOpt) { bool Modified = false; // Four tranformations to do here: // 1) Find loads that directly read from stores and promote them by @@ -1713,29 +1529,21 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, } } } - // 2) Find narrow loads that can be converted into a single wider load - // with bitfield extract instructions. - // e.g., - // ldrh w0, [x2] - // ldrh w1, [x2, #2] - // ; becomes - // ldr w0, [x2] - // ubfx w1, w0, #16, #16 - // and w0, w0, #ffff - // - // Also merge adjacent zero stores into a wider store. + // 2) Merge adjacent zero stores into a wider store. // e.g., // strh wzr, [x0] // strh wzr, [x0, #2] // ; becomes // str wzr, [x0] + // e.g., + // str wzr, [x0] + // str wzr, [x0, #4] + // ; becomes + // str xzr, [x0] for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - enableNarrowLdOpt && MBBI != E;) { - MachineInstr &MI = *MBBI; - unsigned Opc = MI.getOpcode(); - if (isPromotableZeroStoreOpcode(Opc) || - (EnableNarrowLdMerge && isNarrowLoad(Opc))) { - if (tryToMergeLdStInst(MBBI)) { + EnableNarrowZeroStOpt && MBBI != E;) { + if (isPromotableZeroStoreInst(*MBBI)) { + if (tryToMergeZeroStInst(MBBI)) { Modified = true; } else ++MBBI; @@ -1752,44 +1560,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // ldp x0, x1, [x2] for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - // Scaled instructions. - case AArch64::STRSui: - case AArch64::STRDui: - case AArch64::STRQui: - case AArch64::STRXui: - case AArch64::STRWui: - case AArch64::LDRSui: - case AArch64::LDRDui: - case AArch64::LDRQui: - case AArch64::LDRXui: - case AArch64::LDRWui: - case AArch64::LDRSWui: - // Unscaled instructions. - case AArch64::STURSi: - case AArch64::STURDi: - case AArch64::STURQi: - case AArch64::STURWi: - case AArch64::STURXi: - case AArch64::LDURSi: - case AArch64::LDURDi: - case AArch64::LDURQi: - case AArch64::LDURWi: - case AArch64::LDURXi: - case AArch64::LDURSWi: { - if (tryToPairLdStInst(MBBI)) { - Modified = true; - break; - } + if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI)) + Modified = true; + else ++MBBI; - break; - } - } } // 4) Find base register updates that can be merged into the load or store // as a base-reg writeback. @@ -1930,16 +1704,17 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { UsedRegs.resize(TRI->getNumRegs()); bool Modified = false; - bool enableNarrowLdOpt = - Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign(); + bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign(); for (auto &MBB : Fn) - Modified |= optimizeBlock(MBB, enableNarrowLdOpt); + Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt); return Modified; } -// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep -// loads and stores near one another? +// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and +// stores near one another? Note: The pre-RA instruction scheduler already has +// hooks to try and schedule pairable loads/stores together to improve pairing +// opportunities. Thus, pre-RA pairing pass may not be worth the effort. // FIXME: When pairing store instructions it's very possible for this pass to // hoist a store with a KILL marker above another use (without a KILL marker). |
