diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 367 | 
1 files changed, 273 insertions, 94 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 41af5522d967..dc6d5b8950c3 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -21,7 +21,6 @@  #include "AArch64MachineFunctionInfo.h"  #include "AArch64Subtarget.h"  #include "MCTargetDesc/AArch64AddressingModes.h" -#include "llvm/ADT/BitVector.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringRef.h" @@ -293,6 +292,8 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,      return AArch64::LDRWui;    case AArch64::LDURSWi:      return AArch64::LDURWi; +  case AArch64::LDRSWpre: +    return AArch64::LDRWpre;    }  } @@ -372,6 +373,8 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {    case AArch64::LDRSWui:    case AArch64::LDURSWi:      return AArch64::LDPSWi; +  case AArch64::LDRSWpre: +    return AArch64::LDPSWpre;    }  } @@ -585,6 +588,8 @@ static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {      return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);    case AArch64::LDRXpre:      return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi); +  case AArch64::LDRSWpre: +    return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);    }  } @@ -717,6 +722,16 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {    }  } +static bool isRewritableImplicitDef(unsigned Opc) { +  switch (Opc) { +  default: +    return false; +  case AArch64::ORRWrs: +  case AArch64::ADDWri: +    return true; +  } +} +  MachineBasicBlock::iterator  AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,                                             MachineBasicBlock::iterator MergeMI, @@ -859,66 +874,100 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,    bool MergeForward = Flags.getMergeForward();    std::optional<MCPhysReg> RenameReg = Flags.getRenameReg(); -  if (MergeForward && RenameReg) { +  if (RenameReg) {      MCRegister RegToRename = getLdStRegOp(*I).getReg();      DefinedInBB.addReg(*RenameReg);      // Return the sub/super register for RenameReg, matching the size of      // OriginalReg. -    auto GetMatchingSubReg = [this, -                              RenameReg](MCPhysReg OriginalReg) -> MCPhysReg { -      for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg)) -        if (TRI->getMinimalPhysRegClass(OriginalReg) == -            TRI->getMinimalPhysRegClass(SubOrSuper)) +    auto GetMatchingSubReg = +        [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg { +      for (MCPhysReg SubOrSuper : +           TRI->sub_and_superregs_inclusive(*RenameReg)) { +        if (C->contains(SubOrSuper))            return SubOrSuper; +      }        llvm_unreachable("Should have found matching sub or super register!");      };      std::function<bool(MachineInstr &, bool)> UpdateMIs = -        [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) { +        [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI, +                                                             bool IsDef) {            if (IsDef) {              bool SeenDef = false; -            for (auto &MOP : MI.operands()) { +            for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) { +              MachineOperand &MOP = MI.getOperand(OpIdx);                // Rename the first explicit definition and all implicit                // definitions matching RegToRename.                if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && -                  (!SeenDef || (MOP.isDef() && MOP.isImplicit())) && +                  (!MergeForward || !SeenDef || +                   (MOP.isDef() && MOP.isImplicit())) &&                    TRI->regsOverlap(MOP.getReg(), RegToRename)) {                  assert((MOP.isImplicit() ||                          (MOP.isRenamable() && !MOP.isEarlyClobber())) &&                         "Need renamable operands"); -                MOP.setReg(GetMatchingSubReg(MOP.getReg())); +                Register MatchingReg; +                if (const TargetRegisterClass *RC = +                        MI.getRegClassConstraint(OpIdx, TII, TRI)) +                  MatchingReg = GetMatchingSubReg(RC); +                else { +                  if (!isRewritableImplicitDef(MI.getOpcode())) +                    continue; +                  MatchingReg = GetMatchingSubReg( +                      TRI->getMinimalPhysRegClass(MOP.getReg())); +                } +                MOP.setReg(MatchingReg);                  SeenDef = true;                }              }            } else { -            for (auto &MOP : MI.operands()) { +            for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) { +              MachineOperand &MOP = MI.getOperand(OpIdx);                if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&                    TRI->regsOverlap(MOP.getReg(), RegToRename)) {                  assert((MOP.isImplicit() ||                          (MOP.isRenamable() && !MOP.isEarlyClobber())) &&                             "Need renamable operands"); -                MOP.setReg(GetMatchingSubReg(MOP.getReg())); +                Register MatchingReg; +                if (const TargetRegisterClass *RC = +                        MI.getRegClassConstraint(OpIdx, TII, TRI)) +                  MatchingReg = GetMatchingSubReg(RC); +                else +                  MatchingReg = GetMatchingSubReg( +                      TRI->getMinimalPhysRegClass(MOP.getReg())); +                assert(MatchingReg != AArch64::NoRegister && +                       "Cannot find matching regs for renaming"); +                MOP.setReg(MatchingReg);                }              }            } -          LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n"); +          LLVM_DEBUG(dbgs() << "Renamed " << MI);            return true;          }; -    forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs); +    forAllMIsUntilDef(MergeForward ? *I : *std::prev(Paired), RegToRename, TRI, +                      UINT32_MAX, UpdateMIs);  #if !defined(NDEBUG) -    // Make sure the register used for renaming is not used between the paired -    // instructions. That would trash the content before the new paired -    // instruction. +    // For forward merging store: +    // Make sure the register used for renaming is not used between the +    // paired instructions. That would trash the content before the new +    // paired instruction. +    MCPhysReg RegToCheck = *RenameReg; +    // For backward merging load: +    // Make sure the register being renamed is not used between the +    // paired instructions. That would trash the content after the new +    // paired instruction. +    if (!MergeForward) +      RegToCheck = RegToRename;      for (auto &MI :           iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>( -             std::next(I), std::next(Paired))) +             MergeForward ? std::next(I) : I, +             MergeForward ? std::next(Paired) : Paired))        assert(all_of(MI.operands(), -                    [this, &RenameReg](const MachineOperand &MOP) { +                    [this, RegToCheck](const MachineOperand &MOP) {                        return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||                               MOP.isUndef() || -                             !TRI->regsOverlap(MOP.getReg(), *RenameReg); +                             !TRI->regsOverlap(MOP.getReg(), RegToCheck);                      }) &&               "Rename register used between paired instruction, trashing the "               "content"); @@ -984,15 +1033,23 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,    MachineBasicBlock *MBB = I->getParent();    MachineOperand RegOp0 = getLdStRegOp(*RtMI);    MachineOperand RegOp1 = getLdStRegOp(*Rt2MI); +  MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;    // Kill flags may become invalid when moving stores for pairing.    if (RegOp0.isUse()) {      if (!MergeForward) {        // Clear kill flags on store if moving upwards. Example: -      //   STRWui %w0, ... +      //   STRWui kill %w0, ...        //   USE %w1        //   STRWui kill %w1  ; need to clear kill flag when moving STRWui upwards -      RegOp0.setIsKill(false); -      RegOp1.setIsKill(false); +      // We are about to move the store of w1, so its kill flag may become +      // invalid; not the case for w0. +      // Since w1 is used between the stores, the kill flag on w1 is cleared +      // after merging. +      //   STPWi kill %w0, %w1, ... +      //   USE %w1 +      for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It) +        if (It->readsRegister(PairedRegOp.getReg(), TRI)) +          PairedRegOp.setIsKill(false);      } else {        // Clear kill flags of the first stores register. Example:        //   STRWui %w1, ... @@ -1318,6 +1375,10 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,    if (OpcA == OpcB)      return !AArch64InstrInfo::isPreLdSt(FirstMI); +  // Two pre ld/st of different opcodes cannot be merged either +  if (AArch64InstrInfo::isPreLdSt(FirstMI) && AArch64InstrInfo::isPreLdSt(MI)) +    return false; +    // Try to match a sign-extended load/store with a zero-extended load/store.    bool IsValidLdStrOpc, PairIsValidLdStrOpc;    unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc); @@ -1340,7 +1401,7 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,      return false;    // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and -  // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui +  // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui    // are candidate pairs that can be merged.    if (isPreLdStPairCandidate(FirstMI, MI))      return true; @@ -1352,6 +1413,38 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,    // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?  } +static bool canRenameMOP(const MachineOperand &MOP, +                         const TargetRegisterInfo *TRI) { +  if (MOP.isReg()) { +    auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg()); +    // Renaming registers with multiple disjunct sub-registers (e.g. the +    // result of a LD3) means that all sub-registers are renamed, potentially +    // impacting other instructions we did not check. Bail out. +    // Note that this relies on the structure of the AArch64 register file. In +    // particular, a subregister cannot be written without overwriting the +    // whole register. +    if (RegClass->HasDisjunctSubRegs) { +      LLVM_DEBUG( +          dbgs() +          << "  Cannot rename operands with multiple disjunct subregisters (" +          << MOP << ")\n"); +      return false; +    } + +    // We cannot rename arbitrary implicit-defs, the specific rule to rewrite +    // them must be known. For example, in ORRWrs the implicit-def +    // corresponds to the result register. +    if (MOP.isImplicit() && MOP.isDef()) { +      if (!isRewritableImplicitDef(MOP.getParent()->getOpcode())) +        return false; +      return TRI->isSuperOrSubRegisterEq( +          MOP.getParent()->getOperand(0).getReg(), MOP.getReg()); +    } +  } +  return MOP.isImplicit() || +         (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); +} +  static bool  canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,                   SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, @@ -1361,10 +1454,6 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,    // Check if we can find an unused register which we can use to rename    // the register used by the first load/store. -  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); -  MachineFunction &MF = *FirstMI.getParent()->getParent(); -  if (!RegClass || !MF.getRegInfo().tracksLiveness()) -    return false;    auto RegToRename = getLdStRegOp(FirstMI).getReg();    // For now, we only rename if the store operand gets killed at the store. @@ -1375,29 +1464,9 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,                         MOP.isImplicit() && MOP.isKill() &&                         TRI->regsOverlap(RegToRename, MOP.getReg());                })) { -    LLVM_DEBUG(dbgs() << "  Operand not killed at " << FirstMI << "\n"); +    LLVM_DEBUG(dbgs() << "  Operand not killed at " << FirstMI);      return false;    } -  auto canRenameMOP = [TRI](const MachineOperand &MOP) { -    if (MOP.isReg()) { -      auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg()); -      // Renaming registers with multiple disjunct sub-registers (e.g. the -      // result of a LD3) means that all sub-registers are renamed, potentially -      // impacting other instructions we did not check. Bail out. -      // Note that this relies on the structure of the AArch64 register file. In -      // particular, a subregister cannot be written without overwriting the -      // whole register. -      if (RegClass->HasDisjunctSubRegs) { -        LLVM_DEBUG( -            dbgs() -            << "  Cannot rename operands with multiple disjunct subregisters (" -            << MOP << ")\n"); -        return false; -      } -    } -    return MOP.isImplicit() || -           (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); -  };    bool FoundDef = false; @@ -1407,11 +1476,11 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,    // * collect the registers used and required register classes for RegToRename.    std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,                                                             bool IsDef) { -    LLVM_DEBUG(dbgs() << "Checking " << MI << "\n"); +    LLVM_DEBUG(dbgs() << "Checking " << MI);      // Currently we do not try to rename across frame-setup instructions.      if (MI.getFlag(MachineInstr::FrameSetup)) { -      LLVM_DEBUG(dbgs() << "  Cannot rename framesetup instructions currently (" -                        << MI << ")\n"); +      LLVM_DEBUG(dbgs() << "  Cannot rename framesetup instructions " +                        << "currently\n");        return false;      } @@ -1431,8 +1500,7 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,        //       1. Insert an extra copy, to materialize the def.        //       2. Skip pseudo-defs until we find an non-pseudo def.        if (MI.isPseudo()) { -        LLVM_DEBUG(dbgs() << "  Cannot rename pseudo instruction " << MI -                          << "\n"); +        LLVM_DEBUG(dbgs() << "  Cannot rename pseudo/bundle instruction\n");          return false;        } @@ -1440,9 +1508,8 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,          if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||              !TRI->regsOverlap(MOP.getReg(), RegToRename))            continue; -        if (!canRenameMOP(MOP)) { -          LLVM_DEBUG(dbgs() -                     << "  Cannot rename " << MOP << " in " << MI << "\n"); +        if (!canRenameMOP(MOP, TRI)) { +          LLVM_DEBUG(dbgs() << "  Cannot rename " << MOP << " in " << MI);            return false;          }          RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); @@ -1454,9 +1521,8 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,              !TRI->regsOverlap(MOP.getReg(), RegToRename))            continue; -        if (!canRenameMOP(MOP)) { -          LLVM_DEBUG(dbgs() -                     << "  Cannot rename " << MOP << " in " << MI << "\n"); +        if (!canRenameMOP(MOP, TRI)) { +          LLVM_DEBUG(dbgs() << "  Cannot rename " << MOP << " in " << MI);            return false;          }          RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); @@ -1475,6 +1541,52 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,    return true;  } +// We want to merge the second load into the first by rewriting the usages of +// the same reg between first (incl.) and second (excl.). We don't need to care +// about any insns before FirstLoad or after SecondLoad. +// 1. The second load writes new value into the same reg. +//    - The renaming is impossible to impact later use of the reg. +//    - The second load always trash the value written by the first load which +//      means the reg must be killed before the second load. +// 2. The first load must be a def for the same reg so we don't need to look +//    into anything before it. +static bool canRenameUntilSecondLoad( +    MachineInstr &FirstLoad, MachineInstr &SecondLoad, +    LiveRegUnits &UsedInBetween, +    SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, +    const TargetRegisterInfo *TRI) { +  if (FirstLoad.isPseudo()) +    return false; + +  UsedInBetween.accumulate(FirstLoad); +  auto RegToRename = getLdStRegOp(FirstLoad).getReg(); +  bool Success = std::all_of( +      FirstLoad.getIterator(), SecondLoad.getIterator(), +      [&](MachineInstr &MI) { +        LLVM_DEBUG(dbgs() << "Checking " << MI); +        // Currently we do not try to rename across frame-setup instructions. +        if (MI.getFlag(MachineInstr::FrameSetup)) { +          LLVM_DEBUG(dbgs() << "  Cannot rename framesetup instructions " +                            << "currently\n"); +          return false; +        } + +        for (auto &MOP : MI.operands()) { +          if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() || +              !TRI->regsOverlap(MOP.getReg(), RegToRename)) +            continue; +          if (!canRenameMOP(MOP, TRI)) { +            LLVM_DEBUG(dbgs() << "  Cannot rename " << MOP << " in " << MI); +            return false; +          } +          RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); +        } + +        return true; +      }); +  return Success; +} +  // Check if we can find a physical register for renaming \p Reg. This register  // must:  // * not be defined already in \p DefinedInBB; DefinedInBB must contain all @@ -1501,10 +1613,9 @@ static std::optional<MCPhysReg> tryToFindRegisterToRename(    // required register classes.    auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {      return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) { -      return any_of(TRI->sub_and_superregs_inclusive(PR), -                    [C, TRI](MCPhysReg SubOrSuper) { -                      return C == TRI->getMinimalPhysRegClass(SubOrSuper); -                    }); +      return any_of( +          TRI->sub_and_superregs_inclusive(PR), +          [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });      });    }; @@ -1524,6 +1635,41 @@ static std::optional<MCPhysReg> tryToFindRegisterToRename(    return std::nullopt;  } +// For store pairs: returns a register from FirstMI to the beginning of the +// block that can be renamed. +// For load pairs: returns a register from FirstMI to MI that can be renamed. +static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair( +    std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, +    Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, +    SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, +    const TargetRegisterInfo *TRI) { +  std::optional<MCPhysReg> RenameReg; +  if (!DebugCounter::shouldExecute(RegRenamingCounter)) +    return RenameReg; + +  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); +  MachineFunction &MF = *FirstMI.getParent()->getParent(); +  if (!RegClass || !MF.getRegInfo().tracksLiveness()) +    return RenameReg; + +  const bool IsLoad = FirstMI.mayLoad(); + +  if (!MaybeCanRename) { +    if (IsLoad) +      MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween, +                                                 RequiredClasses, TRI)}; +    else +      MaybeCanRename = { +          canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)}; +  } + +  if (*MaybeCanRename) { +    RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween, +                                          RequiredClasses, TRI); +  } +  return RenameReg; +} +  /// Scan the instructions looking for a load/store that can be combined with the  /// current instruction into a wider equivalent or a load/store pair.  MachineBasicBlock::iterator @@ -1676,17 +1822,6 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,              continue;            }          } -        // If the destination register of one load is the same register or a -        // sub/super register of the other load, bail and keep looking. A -        // load-pair instruction with both destination registers the same is -        // UNPREDICTABLE and will result in an exception. -        if (MayLoad && -            TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) { -          LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, -                                            TRI); -          MemInsns.push_back(&MI); -          continue; -        }          // If the BaseReg has been modified, then we cannot do the optimization.          // For example, in the following pattern @@ -1697,17 +1832,37 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,          if (!ModifiedRegUnits.available(BaseReg))            return E; +        const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq( +                                                Reg, getLdStRegOp(MI).getReg()); +          // If the Rt of the second instruction was not modified or used between          // the two instructions and none of the instructions between the second          // and first alias with the second, we can combine the second into the          // first.          if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) && -            !(MI.mayLoad() && +            !(MI.mayLoad() && !SameLoadReg &&                !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&              !mayAlias(MI, MemInsns, AA)) { +          // For pairs loading into the same reg, try to find a renaming +          // opportunity to allow the renaming of Reg between FirstMI and MI +          // and combine MI into FirstMI; otherwise bail and keep looking. +          if (SameLoadReg) { +            std::optional<MCPhysReg> RenameReg = +                findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI, +                                                Reg, DefinedInBB, UsedInBetween, +                                                RequiredClasses, TRI); +            if (!RenameReg) { +              LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, +                                                UsedRegUnits, TRI); +              MemInsns.push_back(&MI); +              continue; +            } +            Flags.setRenameReg(*RenameReg); +          }            Flags.setMergeForward(false); -          Flags.clearRenameReg(); +          if (!SameLoadReg) +            Flags.clearRenameReg();            return MBBI;          } @@ -1725,22 +1880,13 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,              return MBBI;            } -          if (DebugCounter::shouldExecute(RegRenamingCounter)) { -            if (!MaybeCanRename) -              MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween, -                                                 RequiredClasses, TRI)}; - -            if (*MaybeCanRename) { -              std::optional<MCPhysReg> MaybeRenameReg = -                  tryToFindRegisterToRename(*FirstMI.getParent()->getParent(), -                                            Reg, DefinedInBB, UsedInBetween, -                                            RequiredClasses, TRI); -              if (MaybeRenameReg) { -                Flags.setRenameReg(*MaybeRenameReg); -                Flags.setMergeForward(true); -                MBBIWithRenameReg = MBBI; -              } -            } +          std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair( +              MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween, +              RequiredClasses, TRI); +          if (RenameReg) { +            Flags.setMergeForward(true); +            Flags.setRenameReg(*RenameReg); +            MBBIWithRenameReg = MBBI;            }          }          // Unable to combine these instructions due to interference in between. @@ -2136,6 +2282,14 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {    if (!TII->isCandidateToMergeOrPair(MI))      return false; +  // If disable-ldp feature is opted, do not emit ldp. +  if (MI.mayLoad() && Subtarget->hasDisableLdp()) +    return false; + +  // If disable-stp feature is opted, do not emit stp. +  if (MI.mayStore() && Subtarget->hasDisableStp()) +    return false; +    // Early exit if the offset is not possible to match. (6 bits of positive    // range, plus allow an extra one in case we find a later insn that matches    // with Offset-1) @@ -2159,6 +2313,31 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {      // Keeping the iterator straight is a pain, so we let the merge routine tell      // us what the next instruction is after it's done mucking about.      auto Prev = std::prev(MBBI); + +    // Fetch the memoperand of the load/store that is a candidate for +    // combination. +    MachineMemOperand *MemOp = +        MI.memoperands_empty() ? nullptr : MI.memoperands().front(); + +    // Get the needed alignments to check them if +    // ldp-aligned-only/stp-aligned-only features are opted. +    uint64_t MemAlignment = MemOp ? MemOp->getAlign().value() : -1; +    uint64_t TypeAlignment = MemOp ? Align(MemOp->getSize()).value() : -1; + +    // If a load arrives and ldp-aligned-only feature is opted, check that the +    // alignment of the source pointer is at least double the alignment of the +    // type. +    if (MI.mayLoad() && Subtarget->hasLdpAlignedOnly() && MemOp && +        MemAlignment < 2 * TypeAlignment) +      return false; + +    // If a store arrives and stp-aligned-only feature is opted, check that the +    // alignment of the source pointer is at least double the alignment of the +    // type. +    if (MI.mayStore() && Subtarget->hasStpAlignedOnly() && MemOp && +        MemAlignment < 2 * TypeAlignment) +      return false; +      MBBI = mergePairedInsns(MBBI, Paired, Flags);      // Collect liveness info for instructions between Prev and the new position      // MBBI.  | 
