1 files changed, 75 insertions, 36 deletions
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index a985b330eafa..7a1901bd5b1e 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -13,7 +13,9 @@
 
 #include "AArch64LegalizerInfo.h"
 #include "AArch64Subtarget.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
@@ -50,6 +52,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
   const LLT v2s64 = LLT::vector(2, 64);
   const LLT v2p0 = LLT::vector(2, p0);
 
+  // FIXME: support subtargets which have neon/fp-armv8 disabled.
+  if (!ST.hasNEON() || !ST.hasFPARMv8()) {
+    computeTables();
+    return;
+  }
+
   getActionDefinitionsBuilder(G_IMPLICIT_DEF)
     .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
     .clampScalar(0, s1, s64)
@@ -74,7 +82,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
 
   getActionDefinitionsBuilder(G_BSWAP)
       .legalFor({s32, s64, v4s32, v2s32, v2s64})
-      .clampScalar(0, s16, s64)
+      .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0);
 
   getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
@@ -104,6 +112,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
 
   getActionDefinitionsBuilder({G_SDIV, G_UDIV})
       .legalFor({s32, s64})
+      .libcallFor({s128})
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
       .scalarize(0);
@@ -115,8 +124,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
         return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
                AmtTy.getSizeInBits() == 32;
       })
-      .legalFor(
-          {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
+      .legalFor({{s32, s32},
+                 {s32, s64},
+                 {s64, s64},
+                 {v2s32, v2s32},
+                 {v4s32, v4s32},
+                 {v2s64, v2s64}})
       .clampScalar(1, s32, s64)
       .clampScalar(0, s32, s64)
       .minScalarSameAs(1, 0);
@@ -191,14 +204,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .legalIf([=](const LegalityQuery &Query) {
         const LLT &Ty0 = Query.Types[0];
         const LLT &Ty1 = Query.Types[1];
-        if (Ty1 != s32 && Ty1 != s64)
+        if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
           return false;
         if (Ty1 == p0)
           return true;
         return isPowerOf2_32(Ty0.getSizeInBits()) &&
                (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
       })
-      .clampScalar(1, s32, s64)
+      .clampScalar(1, s32, s128)
       .widenScalarToNextPow2(1)
       .maxScalarIf(typeInSet(1, {s32}), 0, s16)
       .maxScalarIf(typeInSet(1, {s64}), 0, s32)
@@ -236,6 +249,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
                                  {s32, p0, 32, 8},
                                  {s64, p0, 64, 8},
                                  {p0, p0, 64, 8},
+                                 {s128, p0, 128, 8},
                                  {v8s8, p0, 64, 8},
                                  {v16s8, p0, 128, 8},
                                  {v4s16, p0, 64, 8},
@@ -247,14 +261,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .legalForTypesWithMemDesc({{s32, p0, 8, 8},
                                  {s32, p0, 16, 8}})
       .clampScalar(0, s8, s64)
-      .widenScalarToNextPow2(0)
-      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
-      //       how to do that yet.
-      .unsupportedIfMemSizeNotPow2()
+      .lowerIfMemSizeNotPow2()
       // Lower any any-extending loads left into G_ANYEXT and G_LOAD
       .lowerIf([=](const LegalityQuery &Query) {
         return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
       })
+      .widenScalarToNextPow2(0)
       .clampMaxNumElements(0, s32, 2)
       .clampMaxNumElements(0, s64, 1)
       .customIf(IsPtrVecPred);
@@ -262,9 +274,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
   getActionDefinitionsBuilder(G_STORE)
       .legalForTypesWithMemDesc({{s8, p0, 8, 8},
                                  {s16, p0, 16, 8},
+                                 {s32, p0, 8, 8},
+                                 {s32, p0, 16, 8},
                                  {s32, p0, 32, 8},
                                  {s64, p0, 64, 8},
                                  {p0, p0, 64, 8},
+                                 {s128, p0, 128, 8},
                                  {v16s8, p0, 128, 8},
                                  {v4s16, p0, 64, 8},
                                  {v8s16, p0, 128, 8},
@@ -272,10 +287,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
                                  {v4s32, p0, 128, 8},
                                  {v2s64, p0, 128, 8}})
       .clampScalar(0, s8, s64)
-      .widenScalarToNextPow2(0)
-      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
-      //       how to do that yet.
-      .unsupportedIfMemSizeNotPow2()
+      .lowerIfMemSizeNotPow2()
       .lowerIf([=](const LegalityQuery &Query) {
         return Query.Types[0].isScalar() &&
                Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
@@ -305,8 +317,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
                  {v8s16, v8s16},
                  {v8s8, v8s8},
                  {v16s8, v16s8}})
-      .clampScalar(0, s32, s32)
       .clampScalar(1, s32, s64)
+      .clampScalar(0, s32, s32)
       .minScalarEltSameAsIf(
           [=](const LegalityQuery &Query) {
             const LLT &Ty = Query.Types[0];
@@ -330,33 +342,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .widenScalarToNextPow2(1);
 
   // Extensions
-  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
-      .legalIf([=](const LegalityQuery &Query) {
-        unsigned DstSize = Query.Types[0].getSizeInBits();
-
-        // Make sure that we have something that will fit in a register, and
-        // make sure it's a power of 2.
-        if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
-          return false;
+  auto ExtLegalFunc = [=](const LegalityQuery &Query) {
+    unsigned DstSize = Query.Types[0].getSizeInBits();
+
+    if (DstSize == 128 && !Query.Types[0].isVector())
+      return false; // Extending to a scalar s128 needs narrowing.
+    
+    // Make sure that we have something that will fit in a register, and
+    // make sure it's a power of 2.
+    if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
+      return false;
 
-        const LLT &SrcTy = Query.Types[1];
+    const LLT &SrcTy = Query.Types[1];
 
-        // Special case for s1.
-        if (SrcTy == s1)
-          return true;
+    // Special case for s1.
+    if (SrcTy == s1)
+      return true;
 
-        // Make sure we fit in a register otherwise. Don't bother checking that
-        // the source type is below 128 bits. We shouldn't be allowing anything
-        // through which is wider than the destination in the first place.
-        unsigned SrcSize = SrcTy.getSizeInBits();
-        if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
-          return false;
+    // Make sure we fit in a register otherwise. Don't bother checking that
+    // the source type is below 128 bits. We shouldn't be allowing anything
+    // through which is wider than the destination in the first place.
+    unsigned SrcSize = SrcTy.getSizeInBits();
+    if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
+      return false;
 
-        return true;
-      });
+    return true;
+  };
+  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
+      .legalIf(ExtLegalFunc)
+      .clampScalar(0, s64, s64); // Just for s128, others are handled above.
 
   getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
 
+  getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+
   // FP conversions
   getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
       {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
@@ -591,6 +610,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
     return Query.Types[0] == p0 && Query.Types[1] == s64;
   });
 
+  getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
+
   computeTables();
   verify(*ST.getInstrInfo());
 }
@@ -617,6 +638,24 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
   llvm_unreachable("expected switch to return");
 }
 
+bool AArch64LegalizerInfo::legalizeIntrinsic(
+    MachineInstr &MI, MachineRegisterInfo &MRI,
+    MachineIRBuilder &MIRBuilder) const {
+  switch (MI.getIntrinsicID()) {
+  case Intrinsic::memcpy:
+  case Intrinsic::memset:
+  case Intrinsic::memmove:
+    if (createMemLibcall(MIRBuilder, MRI, MI) ==
+        LegalizerHelper::UnableToLegalize)
+      return false;
+    MI.eraseFromParent();
+    return true;
+  default:
+    break;
+  }
+  return true;
+}
+
 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
     GISelChangeObserver &Observer) const {
@@ -655,7 +694,7 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
   // legalized. In order to allow further legalization of the inst, we create
   // a new instruction and erase the existing one.
 
-  unsigned ValReg = MI.getOperand(0).getReg();
+  Register ValReg = MI.getOperand(0).getReg();
   const LLT ValTy = MRI.getType(ValReg);
 
   if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
@@ -672,7 +711,7 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
     auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
     MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
   } else {
-    unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
+    Register NewReg = MRI.createGenericVirtualRegister(NewTy);
     auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
     MIRBuilder.buildBitcast({ValReg}, {NewLoad});
   }