diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
| -rw-r--r-- | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 304 | 
1 files changed, 152 insertions, 152 deletions
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 9a7f45bde6c9..8a29456430b9 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1,4 +1,4 @@ -//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// +//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//  //  //                     The LLVM Compiler Infrastructure  // @@ -20,12 +20,14 @@  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringRef.h"  #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h"  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineInstrBuilder.h"  #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/MC/MCRegisterInfo.h"  #include "llvm/Pass.h" @@ -33,7 +35,6 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h"  #include <cassert>  #include <cstdint>  #include <iterator> @@ -64,7 +65,7 @@ static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),  namespace { -typedef struct LdStPairFlags { +using LdStPairFlags = struct LdStPairFlags {    // If a matching instruction is found, MergeForward is set to true if the    // merge is to remove the first instruction and replace the second with    // a pair-wise insn, and false if the reverse is true. @@ -83,8 +84,7 @@ typedef struct LdStPairFlags {    void setSExtIdx(int V) { SExtIdx = V; }    int getSExtIdx() const { return SExtIdx; } - -} LdStPairFlags; +};  struct AArch64LoadStoreOpt : public MachineFunctionPass {    static char ID; @@ -101,7 +101,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {    // Track which registers have been modified and used.    BitVector ModifiedRegs, UsedRegs; -  virtual void getAnalysisUsage(AnalysisUsage &AU) const override { +  void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<AAResultsWrapperPass>();      MachineFunctionPass::getAnalysisUsage(AU);    } @@ -168,6 +168,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {    // Find and promote load instructions which read directly from store.    bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); +  // Find and merge a base register updates before or after a ld/st instruction. +  bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); +    bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);    bool runOnMachineFunction(MachineFunction &Fn) override; @@ -578,6 +581,75 @@ static bool isPromotableZeroStoreInst(MachineInstr &MI) {           getLdStRegOp(MI).getReg() == AArch64::WZR;  } +static bool isPromotableLoadFromStore(MachineInstr &MI) { +  switch (MI.getOpcode()) { +  default: +    return false; +  // Scaled instructions. +  case AArch64::LDRBBui: +  case AArch64::LDRHHui: +  case AArch64::LDRWui: +  case AArch64::LDRXui: +  // Unscaled instructions. +  case AArch64::LDURBBi: +  case AArch64::LDURHHi: +  case AArch64::LDURWi: +  case AArch64::LDURXi: +    return true; +  } +} + +static bool isMergeableLdStUpdate(MachineInstr &MI) { +  unsigned Opc = MI.getOpcode(); +  switch (Opc) { +  default: +    return false; +  // Scaled instructions. +  case AArch64::STRSui: +  case AArch64::STRDui: +  case AArch64::STRQui: +  case AArch64::STRXui: +  case AArch64::STRWui: +  case AArch64::STRHHui: +  case AArch64::STRBBui: +  case AArch64::LDRSui: +  case AArch64::LDRDui: +  case AArch64::LDRQui: +  case AArch64::LDRXui: +  case AArch64::LDRWui: +  case AArch64::LDRHHui: +  case AArch64::LDRBBui: +  // Unscaled instructions. +  case AArch64::STURSi: +  case AArch64::STURDi: +  case AArch64::STURQi: +  case AArch64::STURWi: +  case AArch64::STURXi: +  case AArch64::LDURSi: +  case AArch64::LDURDi: +  case AArch64::LDURQi: +  case AArch64::LDURWi: +  case AArch64::LDURXi: +  // Paired instructions. +  case AArch64::LDPSi: +  case AArch64::LDPSWi: +  case AArch64::LDPDi: +  case AArch64::LDPQi: +  case AArch64::LDPWi: +  case AArch64::LDPXi: +  case AArch64::STPSi: +  case AArch64::STPDi: +  case AArch64::STPQi: +  case AArch64::STPWi: +  case AArch64::STPXi: +    // Make sure this is a reg+imm (as opposed to an address reloc). +    if (!getLdStOffsetOp(MI).isImm()) +      return false; + +    return true; +  } +} +  MachineBasicBlock::iterator  AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,                                             MachineBasicBlock::iterator MergeMI, @@ -758,8 +830,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,    if (SExtIdx != -1) {      // Generate the sign extension for the proper result of the ldp.      // I.e., with X1, that would be: -    // %W1<def> = KILL %W1, %X1<imp-def> -    // %X1<def> = SBFMXri %X1<kill>, 0, 31 +    // %w1 = KILL %w1, implicit-def %x1 +    // %x1 = SBFMXri killed %x1, 0, 31      MachineOperand &DstMO = MIB->getOperand(SExtIdx);      // Right now, DstMO has the extended register, since it comes from an      // extended opcode. @@ -1294,10 +1366,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,    }    (void)MIB; -  if (IsPreIdx) +  if (IsPreIdx) { +    ++NumPreFolded;      DEBUG(dbgs() << "Creating pre-indexed load/store."); -  else +  } else { +    ++NumPostFolded;      DEBUG(dbgs() << "Creating post-indexed load/store."); +  }    DEBUG(dbgs() << "    Replacing instructions:\n    ");    DEBUG(I->print(dbgs()));    DEBUG(dbgs() << "    "); @@ -1558,6 +1633,60 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {    return false;  } +bool AArch64LoadStoreOpt::tryToMergeLdStUpdate +    (MachineBasicBlock::iterator &MBBI) { +  MachineInstr &MI = *MBBI; +  MachineBasicBlock::iterator E = MI.getParent()->end(); +  MachineBasicBlock::iterator Update; + +  // Look forward to try to form a post-index instruction. For example, +  // ldr x0, [x20] +  // add x20, x20, #32 +  //   merged into: +  // ldr x0, [x20], #32 +  Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); +  if (Update != E) { +    // Merge the update into the ld/st. +    MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); +    return true; +  } + +  // Don't know how to handle unscaled pre/post-index versions below, so bail. +  if (TII->isUnscaledLdSt(MI.getOpcode())) +    return false; + +  // Look back to try to find a pre-index instruction. For example, +  // add x0, x0, #8 +  // ldr x1, [x0] +  //   merged into: +  // ldr x1, [x0, #8]! +  Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); +  if (Update != E) { +    // Merge the update into the ld/st. +    MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); +    return true; +  } + +  // The immediate in the load/store is scaled by the size of the memory +  // operation. The immediate in the add we're looking for, +  // however, is not, so adjust here. +  int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); + +  // Look forward to try to find a post-index instruction. For example, +  // ldr x1, [x0, #64] +  // add x0, x0, #64 +  //   merged into: +  // ldr x1, [x0, #64]! +  Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); +  if (Update != E) { +    // Merge the update into the ld/st. +    MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); +    return true; +  } + +  return false; +} +  bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,                                          bool EnableNarrowZeroStOpt) {    bool Modified = false; @@ -1573,29 +1702,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,    //        lsr w2, w1, #16    for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();         MBBI != E;) { -    MachineInstr &MI = *MBBI; -    switch (MI.getOpcode()) { -    default: -      // Just move on to the next instruction. -      ++MBBI; -      break; -    // Scaled instructions. -    case AArch64::LDRBBui: -    case AArch64::LDRHHui: -    case AArch64::LDRWui: -    case AArch64::LDRXui: -    // Unscaled instructions. -    case AArch64::LDURBBi: -    case AArch64::LDURHHi: -    case AArch64::LDURWi: -    case AArch64::LDURXi: -      if (tryToPromoteLoadFromStore(MBBI)) { -        Modified = true; -        break; -      } +    if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI)) +      Modified = true; +    else        ++MBBI; -      break; -    }    }    // 2) Merge adjacent zero stores into a wider store.    //      e.g., @@ -1608,17 +1718,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,    //        str wzr, [x0, #4]    //        ; becomes    //        str xzr, [x0] -  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); -       EnableNarrowZeroStOpt && MBBI != E;) { -    if (isPromotableZeroStoreInst(*MBBI)) { -      if (tryToMergeZeroStInst(MBBI)) { +  if (EnableNarrowZeroStOpt) +    for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); +         MBBI != E;) { +      if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))          Modified = true; -      } else +      else          ++MBBI; -    } else -      ++MBBI; -  } - +    }    // 3) Find loads and stores that can be merged into a single load or store    //    pair instruction.    //      e.g., @@ -1642,124 +1749,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,    //        ldr x0, [x2], #4    for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();         MBBI != E;) { -    MachineInstr &MI = *MBBI; -    // Do update merging. It's simpler to keep this separate from the above -    // switchs, though not strictly necessary. -    unsigned Opc = MI.getOpcode(); -    switch (Opc) { -    default: -      // Just move on to the next instruction. -      ++MBBI; -      break; -    // Scaled instructions. -    case AArch64::STRSui: -    case AArch64::STRDui: -    case AArch64::STRQui: -    case AArch64::STRXui: -    case AArch64::STRWui: -    case AArch64::STRHHui: -    case AArch64::STRBBui: -    case AArch64::LDRSui: -    case AArch64::LDRDui: -    case AArch64::LDRQui: -    case AArch64::LDRXui: -    case AArch64::LDRWui: -    case AArch64::LDRHHui: -    case AArch64::LDRBBui: -    // Unscaled instructions. -    case AArch64::STURSi: -    case AArch64::STURDi: -    case AArch64::STURQi: -    case AArch64::STURWi: -    case AArch64::STURXi: -    case AArch64::LDURSi: -    case AArch64::LDURDi: -    case AArch64::LDURQi: -    case AArch64::LDURWi: -    case AArch64::LDURXi: -    // Paired instructions. -    case AArch64::LDPSi: -    case AArch64::LDPSWi: -    case AArch64::LDPDi: -    case AArch64::LDPQi: -    case AArch64::LDPWi: -    case AArch64::LDPXi: -    case AArch64::STPSi: -    case AArch64::STPDi: -    case AArch64::STPQi: -    case AArch64::STPWi: -    case AArch64::STPXi: { -      // Make sure this is a reg+imm (as opposed to an address reloc). -      if (!getLdStOffsetOp(MI).isImm()) { -        ++MBBI; -        break; -      } -      // Look forward to try to form a post-index instruction. For example, -      // ldr x0, [x20] -      // add x20, x20, #32 -      //   merged into: -      // ldr x0, [x20], #32 -      MachineBasicBlock::iterator Update = -          findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); -      if (Update != E) { -        // Merge the update into the ld/st. -        MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); -        Modified = true; -        ++NumPostFolded; -        break; -      } - -      // Don't know how to handle unscaled pre/post-index versions below, so -      // move to the next instruction. -      if (TII->isUnscaledLdSt(Opc)) { -        ++MBBI; -        break; -      } - -      // Look back to try to find a pre-index instruction. For example, -      // add x0, x0, #8 -      // ldr x1, [x0] -      //   merged into: -      // ldr x1, [x0, #8]! -      Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); -      if (Update != E) { -        // Merge the update into the ld/st. -        MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); -        Modified = true; -        ++NumPreFolded; -        break; -      } -      // The immediate in the load/store is scaled by the size of the memory -      // operation. The immediate in the add we're looking for, -      // however, is not, so adjust here. -      int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); - -      // Look forward to try to find a post-index instruction. For example, -      // ldr x1, [x0, #64] -      // add x0, x0, #64 -      //   merged into: -      // ldr x1, [x0, #64]! -      Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); -      if (Update != E) { -        // Merge the update into the ld/st. -        MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); -        Modified = true; -        ++NumPreFolded; -        break; -      } - -      // Nothing found. Just move to the next instruction. +    if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI)) +      Modified = true; +    else        ++MBBI; -      break; -    } -    }    }    return Modified;  }  bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { -  if (skipFunction(*Fn.getFunction())) +  if (skipFunction(Fn.getFunction()))      return false;    Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());  | 
