Vendor import of llvm trunk r290819: - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-01-02 19:17:04 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-01-02 19:17:04 +0000
commit	b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch)
tree	98b8f811c7aff2547cab8642daf372d6c59502fb /lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
parent	6421cca32f69ac849537a3cff78c352195e99f1b (diff)

vendor/llvm/llvm-trunk-r290819

Notes

Diffstat (limited to 'lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')

-rw-r--r--

lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

373

1 files changed, 74 insertions, 299 deletions

diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index dd2ea6a9dbd6..dcb05601e5f4 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

@@ -38,7 +38,6 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded");

STATISTIC(NumPreFolded, "Number of pre-index updates folded");

STATISTIC(NumUnscaledPairCreated,

"Number of load/store from unscaled generated");

-STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");

STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");

STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");

@@ -51,14 +50,6 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",

static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),

cl::Hidden);

-static cl::opt<bool> EnableNarrowLdMerge("enable-narrow-ld-merge", cl::Hidden,

- cl::init(false),

- cl::desc("Enable narrow load merge"));

-namespace llvm {

-void initializeAArch64LoadStoreOptPass(PassRegistry &);

#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"

namespace {

@@ -111,11 +102,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {

bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,

MachineBasicBlock::iterator &StoreI);

- // Merge the two instructions indicated into a wider instruction.

+ // Merge the two instructions indicated into a wider narrow store instruction.

MachineBasicBlock::iterator

- mergeNarrowInsns(MachineBasicBlock::iterator I,

- MachineBasicBlock::iterator MergeMI,

- const LdStPairFlags &Flags);

+ mergeNarrowZeroStores(MachineBasicBlock::iterator I,

+ MachineBasicBlock::iterator MergeMI,

+ const LdStPairFlags &Flags);

// Merge the two instructions indicated into a single pair-wise instruction.

MachineBasicBlock::iterator

@@ -151,8 +142,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {

mergeUpdateInsn(MachineBasicBlock::iterator I,

MachineBasicBlock::iterator Update, bool IsPreIdx);

- // Find and merge foldable ldr/str instructions.

- bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);

+ // Find and merge zero store instructions.

+ bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);

// Find and pair ldr/str instructions.

bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);

@@ -160,18 +151,16 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {

// Find and promote load instructions which read directly from store.

bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);

- bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);

+ bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);

bool runOnMachineFunction(MachineFunction &Fn) override;

MachineFunctionProperties getRequiredProperties() const override {

return MachineFunctionProperties().set(

- MachineFunctionProperties::Property::AllVRegsAllocated);

+ MachineFunctionProperties::Property::NoVRegs);

}

- const char *getPassName() const override {

- return AARCH64_LOAD_STORE_OPT_NAME;

- }

+ StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }

};

char AArch64LoadStoreOpt::ID = 0;

} // namespace

@@ -179,23 +168,6 @@ char AArch64LoadStoreOpt::ID = 0;

INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",

AARCH64_LOAD_STORE_OPT_NAME, false, false)

-static unsigned getBitExtrOpcode(MachineInstr &MI) {

- switch (MI.getOpcode()) {

- default:

- llvm_unreachable("Unexpected opcode.");

- case AArch64::LDRBBui:

- case AArch64::LDURBBi:

- case AArch64::LDRHHui:

- case AArch64::LDURHHi:

- return AArch64::UBFMWri;

- case AArch64::LDRSBWui:

- case AArch64::LDURSBWi:

- case AArch64::LDRSHWui:

- case AArch64::LDURSHWi:

- return AArch64::SBFMWri;

- }

static bool isNarrowStore(unsigned Opc) {

switch (Opc) {

default:

@@ -208,30 +180,6 @@ static bool isNarrowStore(unsigned Opc) {

}

-static bool isNarrowLoad(unsigned Opc) {

- switch (Opc) {

- default:

- return false;

- case AArch64::LDRHHui:

- case AArch64::LDURHHi:

- case AArch64::LDRBBui:

- case AArch64::LDURBBi:

- case AArch64::LDRSHWui:

- case AArch64::LDURSHWi:

- case AArch64::LDRSBWui:

- case AArch64::LDURSBWi:

- return true;

- }

-static bool isNarrowLoad(MachineInstr &MI) {

- return isNarrowLoad(MI.getOpcode());

-static bool isNarrowLoadOrStore(unsigned Opc) {

- return isNarrowLoad(Opc) || isNarrowStore(Opc);

// Scaling factor for unscaled load or store.

static int getMemScale(MachineInstr &MI) {

switch (MI.getOpcode()) {

@@ -323,23 +271,11 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,

case AArch64::STURSi:

case AArch64::LDRSui:

case AArch64::LDURSi:

- case AArch64::LDRHHui:

- case AArch64::LDURHHi:

- case AArch64::LDRBBui:

- case AArch64::LDURBBi:

return Opc;

case AArch64::LDRSWui:

return AArch64::LDRWui;

case AArch64::LDURSWi:

return AArch64::LDURWi;

- case AArch64::LDRSBWui:

- return AArch64::LDRBBui;

- case AArch64::LDRSHWui:

- return AArch64::LDRHHui;

- case AArch64::LDURSBWi:

- return AArch64::LDURBBi;

- case AArch64::LDURSHWi:

- return AArch64::LDURHHi;

}

@@ -359,18 +295,6 @@ static unsigned getMatchingWideOpcode(unsigned Opc) {

return AArch64::STURXi;

case AArch64::STRWui:

return AArch64::STRXui;

- case AArch64::LDRHHui:

- case AArch64::LDRSHWui:

- return AArch64::LDRWui;

- case AArch64::LDURHHi:

- case AArch64::LDURSHWi:

- return AArch64::LDURWi;

- case AArch64::LDRBBui:

- case AArch64::LDRSBWui:

- return AArch64::LDRHHui;

- case AArch64::LDURBBi:

- case AArch64::LDURSBWi:

- return AArch64::LDURHHi;

}

@@ -614,23 +538,20 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,

(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));

}

-static bool isPromotableZeroStoreOpcode(unsigned Opc) {

- return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi;

-static bool isPromotableZeroStoreOpcode(MachineInstr &MI) {

- return isPromotableZeroStoreOpcode(MI.getOpcode());

static bool isPromotableZeroStoreInst(MachineInstr &MI) {

- return (isPromotableZeroStoreOpcode(MI)) &&

+ unsigned Opc = MI.getOpcode();

+ return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||

+ isNarrowStore(Opc)) &&

getLdStRegOp(MI).getReg() == AArch64::WZR;

}

MachineBasicBlock::iterator

-AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,

- MachineBasicBlock::iterator MergeMI,

- const LdStPairFlags &Flags) {

+AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,

+ MachineBasicBlock::iterator MergeMI,

+ const LdStPairFlags &Flags) {

+ assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&

+ "Expected promotable zero stores.");

MachineBasicBlock::iterator NextI = I;

++NextI;

// If NextI is the second of the two instructions to be merged, we need

@@ -654,15 +575,12 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,

MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);

// Which register is Rt and which is Rt2 depends on the offset order.

- MachineInstr *RtMI, *Rt2MI;

+ MachineInstr *RtMI;

if (getLdStOffsetOp(*I).getImm() ==

- getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) {

+ getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)

RtMI = &*MergeMI;

- Rt2MI = &*I;

- } else {

+ else

RtMI = &*I;

- Rt2MI = &*MergeMI;

- }

int OffsetImm = getLdStOffsetOp(*RtMI).getImm();

// Change the scaled offset from small to large type.

@@ -671,105 +589,9 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,

OffsetImm /= 2;

}

+ // Construct the new instruction.

DebugLoc DL = I->getDebugLoc();

MachineBasicBlock *MBB = I->getParent();

- if (isNarrowLoad(Opc)) {

- MachineInstr *RtNewDest = &*(MergeForward ? I : MergeMI);

- // When merging small (< 32 bit) loads for big-endian targets, the order of

- // the component parts gets swapped.

- if (!Subtarget->isLittleEndian())

- std::swap(RtMI, Rt2MI);

- // Construct the new load instruction.

- MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;

- NewMemMI =

- BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))

- .addOperand(getLdStRegOp(*RtNewDest))

- .addOperand(BaseRegOp)

- .addImm(OffsetImm)

- .setMemRefs(I->mergeMemRefsWith(*MergeMI));

- (void)NewMemMI;

- DEBUG(

- dbgs()

- << "Creating the new load and extract. Replacing instructions:\n ");

- DEBUG(I->print(dbgs()));

- DEBUG(dbgs() << " ");

- DEBUG(MergeMI->print(dbgs()));

- DEBUG(dbgs() << " with instructions:\n ");

- DEBUG((NewMemMI)->print(dbgs()));

- int Width = getMemScale(*I) == 1 ? 8 : 16;

- int LSBLow = 0;

- int LSBHigh = Width;

- int ImmsLow = LSBLow + Width - 1;

- int ImmsHigh = LSBHigh + Width - 1;

- MachineInstr *ExtDestMI = &*(MergeForward ? MergeMI : I);

- if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {

- // Create the bitfield extract for high bits.

- BitExtMI1 =

- BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI)))

- .addOperand(getLdStRegOp(*Rt2MI))

- .addReg(getLdStRegOp(*RtNewDest).getReg())

- .addImm(LSBHigh)

- .addImm(ImmsHigh);

- // Create the bitfield extract for low bits.

- if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {

- // For unsigned, prefer to use AND for low bits.

- BitExtMI2 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri))

- .addOperand(getLdStRegOp(*RtMI))

- .addReg(getLdStRegOp(*RtNewDest).getReg())

- .addImm(ImmsLow);

- } else {

- BitExtMI2 =

- BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI)))

- .addOperand(getLdStRegOp(*RtMI))

- .addReg(getLdStRegOp(*RtNewDest).getReg())

- .addImm(LSBLow)

- .addImm(ImmsLow);

- }

- } else {

- // Create the bitfield extract for low bits.

- if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {

- // For unsigned, prefer to use AND for low bits.

- BitExtMI1 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri))

- .addOperand(getLdStRegOp(*RtMI))

- .addReg(getLdStRegOp(*RtNewDest).getReg())

- .addImm(ImmsLow);

- } else {

- BitExtMI1 =

- BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI)))

- .addOperand(getLdStRegOp(*RtMI))

- .addReg(getLdStRegOp(*RtNewDest).getReg())

- .addImm(LSBLow)

- .addImm(ImmsLow);

- }

- // Create the bitfield extract for high bits.

- BitExtMI2 =

- BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI)))

- .addOperand(getLdStRegOp(*Rt2MI))

- .addReg(getLdStRegOp(*RtNewDest).getReg())

- .addImm(LSBHigh)

- .addImm(ImmsHigh);

- }

- (void)BitExtMI1;

- (void)BitExtMI2;

- DEBUG(dbgs() << " ");

- DEBUG((BitExtMI1)->print(dbgs()));

- DEBUG(dbgs() << " ");

- DEBUG((BitExtMI2)->print(dbgs()));

- DEBUG(dbgs() << "\n");

- // Erase the old instructions.

- I->eraseFromParent();

- MergeMI->eraseFromParent();

- return NextI;

- }

- assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&

- "Expected promotable zero store");

- // Construct the new instruction.

MachineInstrBuilder MIB;

MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))

.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)

@@ -778,7 +600,7 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,

.setMemRefs(I->mergeMemRefsWith(*MergeMI));

(void)MIB;

- DEBUG(dbgs() << "Creating wider load/store. Replacing instructions:\n ");

+ DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");

DEBUG(I->print(dbgs()));

DEBUG(dbgs() << " ");

DEBUG(MergeMI->print(dbgs()));

@@ -945,6 +767,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,

// Remove the load, if the destination register of the loads is the same

// register for stored value.

if (StRt == LdRt && LoadSize == 8) {

+ StoreI->clearRegisterKills(StRt, TRI);

DEBUG(dbgs() << "Remove load instruction:\n ");

DEBUG(LoadI->print(dbgs()));

DEBUG(dbgs() << "\n");

@@ -1009,6 +832,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,

.addImm(Imms);

}

+ StoreI->clearRegisterKills(StRt, TRI);

(void)BitExtMI;

DEBUG(dbgs() << "Promoting load by replacing :\n ");

@@ -1041,8 +866,10 @@ static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs,

if (!Reg)

continue;

if (MO.isDef()) {

- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)

- ModifiedRegs.set(*AI);

+ // WZR/XZR are not modified even when used as a destination register.

+ if (Reg != AArch64::WZR && Reg != AArch64::XZR)

+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)

+ ModifiedRegs.set(*AI);

} else {

assert(MO.isUse() && "Reg operand not a def and not a use?!?");

for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)

@@ -1118,8 +945,9 @@ bool AArch64LoadStoreOpt::findMatchingStore(

--MBBI;

MachineInstr &MI = *MBBI;

- // Don't count DBG_VALUE instructions towards the search limit.

- if (!MI.isDebugValue())

+ // Don't count transient instructions towards the search limit since there

+ // may be different numbers of them if e.g. debug information is present.

+ if (!MI.isTransient())

++Count;

// If the load instruction reads directly from the address to which the

@@ -1184,13 +1012,14 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,

return true;

}

- // If the second instruction isn't even a load/store, bail out.

+ // If the second instruction isn't even a mergable/pairable load/store, bail

+ // out.

if (!PairIsValidLdStrOpc)

return false;

- // FIXME: We don't support merging narrow loads/stores with mixed

- // scaled/unscaled offsets.

- if (isNarrowLoadOrStore(OpcA) || isNarrowLoadOrStore(OpcB))

+ // FIXME: We don't support merging narrow stores with mixed scaled/unscaled

+ // offsets.

+ if (isNarrowStore(OpcA) || isNarrowStore(OpcB))

return false;

// Try to match an unscaled load/store with a scaled load/store.

@@ -1229,13 +1058,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,

for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {

MachineInstr &MI = *MBBI;

- // Skip DBG_VALUE instructions. Otherwise debug info can affect the

- // optimization by changing how far we scan.

- if (MI.isDebugValue())

- continue;

- // Now that we know this is a real instruction, count it.

- ++Count;

+ // Don't count transient instructions towards the search limit since there

+ // may be different numbers of them if e.g. debug information is present.

+ if (!MI.isTransient())

+ ++Count;

Flags.setSExtIdx(-1);

if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&

@@ -1505,12 +1332,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(

++MBBI;

for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {

MachineInstr &MI = *MBBI;

- // Skip DBG_VALUE instructions.

- if (MI.isDebugValue())

- continue;

- // Now that we know this is a real instruction, count it.

- ++Count;

+ // Don't count transient instructions towards the search limit since there

+ // may be different numbers of them if e.g. debug information is present.

+ if (!MI.isTransient())

+ ++Count;

// If we found a match, return it.

if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))

@@ -1559,8 +1385,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(

--MBBI;

MachineInstr &MI = *MBBI;

- // Don't count DBG_VALUE instructions towards the search limit.

- if (!MI.isDebugValue())

+ // Don't count transient instructions towards the search limit since there

+ // may be different numbers of them if e.g. debug information is present.

+ if (!MI.isTransient())

++Count;

// If we found a match, return it.

@@ -1603,37 +1430,26 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(

return false;

}

-// Find narrow loads that can be converted into a single wider load with

-// bitfield extract instructions. Also merge adjacent zero stores into a wider

-// store.

-bool AArch64LoadStoreOpt::tryToMergeLdStInst(

+// Merge adjacent zero stores into a wider store.

+bool AArch64LoadStoreOpt::tryToMergeZeroStInst(

MachineBasicBlock::iterator &MBBI) {

- assert((isNarrowLoad(*MBBI) || isPromotableZeroStoreOpcode(*MBBI)) &&

- "Expected narrow op.");

+ assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");

MachineInstr &MI = *MBBI;

MachineBasicBlock::iterator E = MI.getParent()->end();

if (!TII->isCandidateToMergeOrPair(MI))

return false;

- // For promotable zero stores, the stored value should be WZR.

- if (isPromotableZeroStoreOpcode(MI) &&

- getLdStRegOp(MI).getReg() != AArch64::WZR)

- return false;

// Look ahead up to LdStLimit instructions for a mergable instruction.

LdStPairFlags Flags;

MachineBasicBlock::iterator MergeMI =

findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);

if (MergeMI != E) {

- if (isNarrowLoad(MI)) {

- ++NumNarrowLoadsPromoted;

- } else if (isPromotableZeroStoreInst(MI)) {

- ++NumZeroStoresPromoted;

- }

+ ++NumZeroStoresPromoted;

// Keeping the iterator straight is a pain, so we let the merge routine tell

// us what the next instruction is after it's done mucking about.

- MBBI = mergeNarrowInsns(MBBI, MergeMI, Flags);

+ MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);

return true;

}

return false;

@@ -1674,7 +1490,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {

}

bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,

- bool enableNarrowLdOpt) {

+ bool EnableNarrowZeroStOpt) {

bool Modified = false;

// Four tranformations to do here:

// 1) Find loads that directly read from stores and promote them by

@@ -1713,29 +1529,21 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,

}

- // 2) Find narrow loads that can be converted into a single wider load

- // with bitfield extract instructions.

- // e.g.,

- // ldrh w0, [x2]

- // ldrh w1, [x2, #2]

- // ; becomes

- // ldr w0, [x2]

- // ubfx w1, w0, #16, #16

- // and w0, w0, #ffff

- //

- // Also merge adjacent zero stores into a wider store.

+ // 2) Merge adjacent zero stores into a wider store.

// e.g.,

// strh wzr, [x0]

// strh wzr, [x0, #2]

// ; becomes

// str wzr, [x0]

+ // e.g.,

+ // str wzr, [x0]

+ // str wzr, [x0, #4]

+ // ; becomes

+ // str xzr, [x0]

for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();

- enableNarrowLdOpt && MBBI != E;) {

- MachineInstr &MI = *MBBI;

- unsigned Opc = MI.getOpcode();

- if (isPromotableZeroStoreOpcode(Opc) ||

- (EnableNarrowLdMerge && isNarrowLoad(Opc))) {

- if (tryToMergeLdStInst(MBBI)) {

+ EnableNarrowZeroStOpt && MBBI != E;) {

+ if (isPromotableZeroStoreInst(*MBBI)) {

+ if (tryToMergeZeroStInst(MBBI)) {

Modified = true;

} else

++MBBI;

@@ -1752,44 +1560,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,

// ldp x0, x1, [x2]

for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();

MBBI != E;) {

- MachineInstr &MI = *MBBI;

- switch (MI.getOpcode()) {

- default:

- // Just move on to the next instruction.

- ++MBBI;

- break;

- // Scaled instructions.

- case AArch64::STRSui:

- case AArch64::STRDui:

- case AArch64::STRQui:

- case AArch64::STRXui:

- case AArch64::STRWui:

- case AArch64::LDRSui:

- case AArch64::LDRDui:

- case AArch64::LDRQui:

- case AArch64::LDRXui:

- case AArch64::LDRWui:

- case AArch64::LDRSWui:

- // Unscaled instructions.

- case AArch64::STURSi:

- case AArch64::STURDi:

- case AArch64::STURQi:

- case AArch64::STURWi:

- case AArch64::STURXi:

- case AArch64::LDURSi:

- case AArch64::LDURDi:

- case AArch64::LDURQi:

- case AArch64::LDURWi:

- case AArch64::LDURXi:

- case AArch64::LDURSWi: {

- if (tryToPairLdStInst(MBBI)) {

- Modified = true;

- break;

- }

+ if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))

+ Modified = true;

+ else

++MBBI;

- break;

- }

}

// 4) Find base register updates that can be merged into the load or store

// as a base-reg writeback.

@@ -1930,16 +1704,17 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {

UsedRegs.resize(TRI->getNumRegs());

bool Modified = false;

- bool enableNarrowLdOpt =

- Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();

+ bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();

for (auto &MBB : Fn)

- Modified |= optimizeBlock(MBB, enableNarrowLdOpt);

+ Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);

return Modified;

}

-// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep

-// loads and stores near one another?

+// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and

+// stores near one another? Note: The pre-RA instruction scheduler already has

+// hooks to try and schedule pairable loads/stores together to improve pairing

+// opportunities. Thus, pre-RA pairing pass may not be worth the effort.

// FIXME: When pairing store instructions it's very possible for this pass to

// hoist a store with a KILL marker above another use (without a KILL marker).