1 files changed, 302 insertions, 79 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 961f38cad1e4..b9ac2657e1c5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -32,6 +32,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -205,6 +206,14 @@ private:
   ComplexRendererFns
   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
                                   unsigned SizeInBytes) const;
+
+  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
+  /// or not a shift + extend should be folded into an addressing mode. Returns
+  /// None when this is not profitable or possible.
+  ComplexRendererFns
+  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
+                    MachineOperand &Offset, unsigned SizeInBytes,
+                    bool WantsExt) const;
   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
                                        unsigned SizeInBytes) const;
@@ -213,6 +222,13 @@ private:
     return selectAddrModeXRO(Root, Width / 8);
   }
 
+  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
+                                       unsigned SizeInBytes) const;
+  template <int Width>
+  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
+    return selectAddrModeWRO(Root, Width / 8);
+  }
+
   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
 
   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
@@ -227,6 +243,15 @@ private:
     return selectShiftedRegister(Root);
   }
 
+  /// Given an extend instruction, determine the correct shift-extend type for
+  /// that instruction.
+  ///
+  /// If the instruction is going to be used in a load or store, pass
+  /// \p IsLoadStore = true.
+  AArch64_AM::ShiftExtendType
+  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
+                       bool IsLoadStore = false) const;
+
   /// Instructions that accept extend modifiers like UXTW expect the register
   /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
   /// subregister copy if necessary. Return either ExtReg, or the result of the
@@ -235,9 +260,12 @@ private:
                                              MachineIRBuilder &MIB) const;
   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
 
-  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
-  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
-  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
+  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                      int OpIdx = -1) const;
+  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
+                          int OpIdx = -1) const;
+  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
+                          int OpIdx = -1) const;
 
   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
   void materializeLargeCMVal(MachineInstr &I, const Value *V,
@@ -462,7 +490,7 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
       }
     } else if (OpSize == 64) {
       switch (GenericOpc) {
-      case TargetOpcode::G_GEP:
+      case TargetOpcode::G_PTR_ADD:
         return AArch64::ADDXrr;
       case TargetOpcode::G_SHL:
         return AArch64::LSLVXr;
@@ -1006,6 +1034,66 @@ bool AArch64InstructionSelector::selectCompareBranch(
   return true;
 }
 
+/// Returns the element immediate value of a vector shift operand if found.
+/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
+static Optional<int64_t> getVectorShiftImm(Register Reg,
+                                           MachineRegisterInfo &MRI) {
+  assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
+  MachineInstr *OpMI = MRI.getVRegDef(Reg);
+  assert(OpMI && "Expected to find a vreg def for vector shift operand");
+  if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
+    return None;
+
+  // Check all operands are identical immediates.
+  int64_t ImmVal = 0;
+  for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
+    auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
+    if (!VRegAndVal)
+      return None;
+
+    if (Idx == 1)
+      ImmVal = VRegAndVal->Value;
+    if (ImmVal != VRegAndVal->Value)
+      return None;
+  }
+
+  return ImmVal;
+}
+
+/// Matches and returns the shift immediate value for a SHL instruction given
+/// a shift operand.
+static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
+  Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
+  if (!ShiftImm)
+    return None;
+  // Check the immediate is in range for a SHL.
+  int64_t Imm = *ShiftImm;
+  if (Imm < 0)
+    return None;
+  switch (SrcTy.getElementType().getSizeInBits()) {
+  default:
+    LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
+    return None;
+  case 8:
+    if (Imm > 7)
+      return None;
+    break;
+  case 16:
+    if (Imm > 15)
+      return None;
+    break;
+  case 32:
+    if (Imm > 31)
+      return None;
+    break;
+  case 64:
+    if (Imm > 63)
+      return None;
+    break;
+  }
+  return Imm;
+}
+
 bool AArch64InstructionSelector::selectVectorSHL(
     MachineInstr &I, MachineRegisterInfo &MRI) const {
   assert(I.getOpcode() == TargetOpcode::G_SHL);
@@ -1017,21 +1105,29 @@ bool AArch64InstructionSelector::selectVectorSHL(
   if (!Ty.isVector())
     return false;
 
+  // Check if we have a vector of constants on RHS that we can select as the
+  // immediate form.
+  Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
+
   unsigned Opc = 0;
   if (Ty == LLT::vector(2, 64)) {
-    Opc = AArch64::USHLv2i64;
+    Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
   } else if (Ty == LLT::vector(4, 32)) {
-    Opc = AArch64::USHLv4i32;
+    Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
   } else if (Ty == LLT::vector(2, 32)) {
-    Opc = AArch64::USHLv2i32;
+    Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
   } else {
     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
     return false;
   }
 
   MachineIRBuilder MIB(I);
-  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
-  constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
+  auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
+  if (ImmVal)
+    Shl.addImm(*ImmVal);
+  else
+    Shl.addUse(Src2Reg);
+  constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
   I.eraseFromParent();
   return true;
 }
@@ -1765,7 +1861,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     auto *PtrMI = MRI.getVRegDef(PtrReg);
 
     // Try to fold a GEP into our unsigned immediate addressing mode.
-    if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
+    if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
         int64_t Imm = *COff;
         const unsigned Size = MemSizeInBits / 8;
@@ -1883,7 +1979,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
-  case TargetOpcode::G_GEP: {
+  case TargetOpcode::G_PTR_ADD: {
     MachineIRBuilder MIRBuilder(I);
     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
             MIRBuilder);
@@ -2065,14 +2161,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     unsigned DstSize = DstTy.getSizeInBits();
     unsigned SrcSize = SrcTy.getSizeInBits();
 
+    if (DstTy.isVector())
+      return false; // Should be handled by imported patterns.
+
     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
                AArch64::GPRRegBankID &&
            "Unexpected ext regbank");
 
     MachineIRBuilder MIB(I);
     MachineInstr *ExtI;
-    if (DstTy.isVector())
-      return false; // Should be handled by imported patterns.
 
     // First check if we're extending the result of a load which has a dest type
     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
@@ -3602,22 +3699,51 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
     return false;
 
   // The shuffle's second operand doesn't matter if the mask is all zero.
-  const Constant *Mask = I.getOperand(3).getShuffleMask();
-  if (!isa<ConstantAggregateZero>(Mask))
+  ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
+  if (!all_of(Mask, [](int Elem) { return Elem == 0; }))
     return false;
 
   // We're done, now find out what kind of splat we need.
   LLT VecTy = MRI.getType(I.getOperand(0).getReg());
   LLT EltTy = VecTy.getElementType();
-  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
-    LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
+  if (EltTy.getSizeInBits() < 32) {
+    LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
     return false;
   }
   bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
-  static const unsigned OpcTable[2][2] = {
-      {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
-      {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
-  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
+  unsigned Opc = 0;
+  if (IsFP) {
+    switch (EltTy.getSizeInBits()) {
+    case 32:
+      if (VecTy.getNumElements() == 2) {
+        Opc = AArch64::DUPv2i32lane;
+      } else {
+        Opc = AArch64::DUPv4i32lane;
+        assert(VecTy.getNumElements() == 4);
+      }
+      break;
+    case 64:
+      assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
+      Opc = AArch64::DUPv2i64lane;
+      break;
+    }
+  } else {
+    switch (EltTy.getSizeInBits()) {
+    case 32:
+      if (VecTy.getNumElements() == 2) {
+        Opc = AArch64::DUPv2i32gpr;
+      } else {
+        Opc = AArch64::DUPv4i32gpr;
+        assert(VecTy.getNumElements() == 4);
+      }
+      break;
+    case 64:
+      assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
+      Opc = AArch64::DUPv2i64gpr;
+      break;
+    }
+  }
+  assert(Opc && "Did not compute an opcode for a dup");
 
   // For FP splats, we need to widen the scalar reg via undef too.
   if (IsFP) {
@@ -3652,15 +3778,12 @@ bool AArch64InstructionSelector::selectShuffleVector(
   const LLT Src1Ty = MRI.getType(Src1Reg);
   Register Src2Reg = I.getOperand(2).getReg();
   const LLT Src2Ty = MRI.getType(Src2Reg);
-  const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
+  ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
 
   MachineBasicBlock &MBB = *I.getParent();
   MachineFunction &MF = *MBB.getParent();
   LLVMContext &Ctx = MF.getFunction().getContext();
 
-  SmallVector<int, 8> Mask;
-  ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
-
   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
   // it's originated from a <1 x T> type. Those should have been lowered into
   // G_BUILD_VECTOR earlier.
@@ -4164,45 +4287,15 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
 }
 
-/// This is used for computing addresses like this:
-///
-/// ldr x1, [x2, x3, lsl #3]
-///
-/// Where x2 is the base register, and x3 is an offset register. The shift-left
-/// is a constant value specific to this load instruction. That is, we'll never
-/// see anything other than a 3 here (which corresponds to the size of the
-/// element being loaded.)
 InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
-    MachineOperand &Root, unsigned SizeInBytes) const {
-  if (!Root.isReg())
-    return None;
-  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
-
-  // Make sure that the memory op is a valid size.
-  int64_t LegalShiftVal = Log2_32(SizeInBytes);
-  if (LegalShiftVal == 0)
-    return None;
-
-  // We want to find something like this:
-  //
-  // val = G_CONSTANT LegalShiftVal
-  // shift = G_SHL off_reg val
-  // ptr = G_GEP base_reg shift
-  // x = G_LOAD ptr
-  //
-  // And fold it into this addressing mode:
-  //
-  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
+AArch64InstructionSelector::selectExtendedSHL(
+    MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
+    unsigned SizeInBytes, bool WantsExt) const {
+  assert(Base.isReg() && "Expected base to be a register operand");
+  assert(Offset.isReg() && "Expected offset to be a register operand");
 
-  // Check if we can find the G_GEP.
-  MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
-  if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
-    return None;
-
-  // Now, try to match an opcode which will match our specific offset.
-  // We want a G_SHL or a G_MUL.
-  MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
+  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+  MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
   if (!OffsetInst)
     return None;
 
@@ -4210,6 +4303,10 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
     return None;
 
+  // Make sure that the memory op is a valid size.
+  int64_t LegalShiftVal = Log2_32(SizeInBytes);
+  if (LegalShiftVal == 0)
+    return None;
   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
     return None;
 
@@ -4254,27 +4351,82 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
   if (ImmVal != LegalShiftVal)
     return None;
 
+  unsigned SignExtend = 0;
+  if (WantsExt) {
+    // Check if the offset is defined by an extend.
+    MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
+    auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
+    if (Ext == AArch64_AM::InvalidShiftExtend)
+      return None;
+
+    SignExtend = Ext == AArch64_AM::SXTW;
+
+    // Need a 32-bit wide register here.
+    MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
+    OffsetReg = ExtInst->getOperand(1).getReg();
+    OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
+  }
+
   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
   // offset. Signify that we are shifting by setting the shift flag to 1.
-  return {{[=](MachineInstrBuilder &MIB) {
-             MIB.addUse(Gep->getOperand(1).getReg());
-           },
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
            [=](MachineInstrBuilder &MIB) {
              // Need to add both immediates here to make sure that they are both
              // added to the instruction.
-             MIB.addImm(0);
+             MIB.addImm(SignExtend);
              MIB.addImm(1);
            }}};
 }
 
 /// This is used for computing addresses like this:
 ///
+/// ldr x1, [x2, x3, lsl #3]
+///
+/// Where x2 is the base register, and x3 is an offset register. The shift-left
+/// is a constant value specific to this load instruction. That is, we'll never
+/// see anything other than a 3 here (which corresponds to the size of the
+/// element being loaded.)
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
+    MachineOperand &Root, unsigned SizeInBytes) const {
+  if (!Root.isReg())
+    return None;
+  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+  // We want to find something like this:
+  //
+  // val = G_CONSTANT LegalShiftVal
+  // shift = G_SHL off_reg val
+  // ptr = G_PTR_ADD base_reg shift
+  // x = G_LOAD ptr
+  //
+  // And fold it into this addressing mode:
+  //
+  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
+
+  // Check if we can find the G_PTR_ADD.
+  MachineInstr *PtrAdd =
+      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+    return None;
+
+  // Now, try to match an opcode which will match our specific offset.
+  // We want a G_SHL or a G_MUL.
+  MachineInstr *OffsetInst =
+      getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
+  return selectExtendedSHL(Root, PtrAdd->getOperand(1),
+                           OffsetInst->getOperand(0), SizeInBytes,
+                           /*WantsExt=*/false);
+}
+
+/// This is used for computing addresses like this:
+///
 /// ldr x1, [x2, x3]
 ///
 /// Where x2 is the base register, and x3 is an offset register.
 ///
-/// When possible (or profitable) to fold a G_GEP into the address calculation,
+/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
 /// this will do so. Otherwise, it will return None.
 InstructionSelector::ComplexRendererFns
 AArch64InstructionSelector::selectAddrModeRegisterOffset(
@@ -4283,7 +4435,7 @@ AArch64InstructionSelector::selectAddrModeRegisterOffset(
 
   // We need a GEP.
   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
-  if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
+  if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
     return None;
 
   // If this is used more than once, let's not bother folding.
@@ -4329,6 +4481,74 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
   return selectAddrModeRegisterOffset(Root);
 }
 
+/// This is used for computing addresses like this:
+///
+/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
+///
+/// Where we have a 64-bit base register, a 32-bit offset register, and an
+/// extend (which may or may not be signed).
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
+                                              unsigned SizeInBytes) const {
+  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+  MachineInstr *PtrAdd =
+      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+    return None;
+
+  MachineOperand &LHS = PtrAdd->getOperand(1);
+  MachineOperand &RHS = PtrAdd->getOperand(2);
+  MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
+
+  // The first case is the same as selectAddrModeXRO, except we need an extend.
+  // In this case, we try to find a shift and extend, and fold them into the
+  // addressing mode.
+  //
+  // E.g.
+  //
+  // off_reg = G_Z/S/ANYEXT ext_reg
+  // val = G_CONSTANT LegalShiftVal
+  // shift = G_SHL off_reg val
+  // ptr = G_PTR_ADD base_reg shift
+  // x = G_LOAD ptr
+  //
+  // In this case we can get a load like this:
+  //
+  // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
+  auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
+                                       SizeInBytes, /*WantsExt=*/true);
+  if (ExtendedShl)
+    return ExtendedShl;
+
+  // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
+  //
+  // e.g.
+  // ldr something, [base_reg, ext_reg, sxtw]
+  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
+    return None;
+
+  // Check if this is an extend. We'll get an extend type if it is.
+  AArch64_AM::ShiftExtendType Ext =
+      getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
+  if (Ext == AArch64_AM::InvalidShiftExtend)
+    return None;
+
+  // Need a 32-bit wide register.
+  MachineIRBuilder MIB(*PtrAdd);
+  Register ExtReg =
+      narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
+  unsigned SignExtend = Ext == AArch64_AM::SXTW;
+
+  // Base is LHS, offset is ExtReg.
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
+           [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
+           [=](MachineInstrBuilder &MIB) {
+             MIB.addImm(SignExtend);
+             MIB.addImm(0);
+           }}};
+}
+
 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
 /// should only match when there is an offset that is not valid for a scaled
 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
@@ -4491,9 +4711,8 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
 }
 
-/// Get the correct ShiftExtendType for an extend instruction.
-static AArch64_AM::ShiftExtendType
-getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
+AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
+    MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
   unsigned Opc = MI.getOpcode();
 
   // Handle explicit extend instructions first.
@@ -4540,9 +4759,9 @@ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
   default:
     return AArch64_AM::InvalidShiftExtend;
   case 0xFF:
-    return AArch64_AM::UXTB;
+    return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
   case 0xFFFF:
-    return AArch64_AM::UXTH;
+    return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
   case 0xFFFFFFFF:
     return AArch64_AM::UXTW;
   }
@@ -4632,25 +4851,29 @@ AArch64InstructionSelector::selectArithExtendedRegister(
 }
 
 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
-                                                const MachineInstr &MI) const {
+                                                const MachineInstr &MI,
+                                                int OpIdx) const {
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
-  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
   assert(CstVal && "Expected constant value");
   MIB.addImm(CstVal.getValue());
 }
 
 void AArch64InstructionSelector::renderLogicalImm32(
-    MachineInstrBuilder &MIB, const MachineInstr &I) const {
-  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
+  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
   MIB.addImm(Enc);
 }
 
 void AArch64InstructionSelector::renderLogicalImm64(
-    MachineInstrBuilder &MIB, const MachineInstr &I) const {
-  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
+  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
   MIB.addImm(Enc);