1 files changed, 200 insertions, 43 deletions
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 380e3b19dc80..d5ff71cf9ac2 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -29,9 +29,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -52,6 +52,7 @@
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 #include <cassert>
@@ -222,7 +223,7 @@ public:
   virtual ~TargetLoweringBase() = default;
 
 protected:
-  /// \brief Initialize all of the actions to default values.
+  /// Initialize all of the actions to default values.
   void initActions();
 
 public:
@@ -253,7 +254,8 @@ public:
   /// A documentation for this function would be nice...
   virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
 
-  EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const;
+  EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
+                       bool LegalTypes = true) const;
 
   /// Returns the type to be used for the index operand of:
   /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
@@ -421,17 +423,17 @@ public:
     return true;
   }
 
-  /// \brief Return true if it is cheap to speculate a call to intrinsic cttz.
+  /// Return true if it is cheap to speculate a call to intrinsic cttz.
   virtual bool isCheapToSpeculateCttz() const {
     return false;
   }
 
-  /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz.
+  /// Return true if it is cheap to speculate a call to intrinsic ctlz.
   virtual bool isCheapToSpeculateCtlz() const {
     return false;
   }
 
-  /// \brief Return true if ctlz instruction is fast.
+  /// Return true if ctlz instruction is fast.
   virtual bool isCtlzFast() const {
     return false;
   }
@@ -444,13 +446,13 @@ public:
     return false;
   }
 
-  /// \brief Return true if it is cheaper to split the store of a merged int val
+  /// Return true if it is cheaper to split the store of a merged int val
   /// from a pair of smaller values into multiple stores.
   virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
     return false;
   }
 
-  /// \brief Return if the target supports combining a
+  /// Return if the target supports combining a
   /// chain like:
   /// \code
   ///   %andResult = and %val1, #mask
@@ -507,7 +509,30 @@ public:
     return hasAndNotCompare(X);
   }
 
-  /// \brief Return true if the target wants to use the optimization that
+  /// There are two ways to clear extreme bits (either low or high):
+  /// Mask:    x &  (-1 << y)  (the instcombine canonical form)
+  /// Shifts:  x >> y << y
+  /// Return true if the variant with 2 shifts is preferred.
+  /// Return false if there is no preference.
+  virtual bool preferShiftsToClearExtremeBits(SDValue X) const {
+    // By default, let's assume that no one prefers shifts.
+    return false;
+  }
+
+  /// Should we tranform the IR-optimal check for whether given truncation
+  /// down into KeptBits would be truncating or not:
+  ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+  /// Into it's more traditional form:
+  ///   ((%x << C) a>> C) dstcond %x
+  /// Return true if we should transform.
+  /// Return false if there is no preference.
+  virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
+                                                    unsigned KeptBits) const {
+    // By default, let's assume that no one prefers shifts.
+    return false;
+  }
+
+  /// Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into
   /// promotedInst1(...(promotedInstN(ext(load)))).
   bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
@@ -746,10 +771,10 @@ public:
   /// operations don't trap except for integer divide and remainder.
   virtual bool canOpTrap(unsigned Op, EVT VT) const;
 
-  /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
-  /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to replace
-  /// a VAND with a constant pool entry.
-  virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+  /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
+  /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
+  /// constant pool entry.
+  virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
                                       EVT /*VT*/) const {
     return false;
   }
@@ -765,6 +790,39 @@ public:
     return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
   }
 
+  LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
+    unsigned EqOpc;
+    switch (Op) {
+      default: llvm_unreachable("Unexpected FP pseudo-opcode");
+      case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
+      case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
+      case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
+      case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
+      case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
+      case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
+      case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+      case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
+      case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
+      case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
+      case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
+      case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
+      case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
+      case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
+      case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
+      case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
+      case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
+    }
+
+    auto Action = getOperationAction(EqOpc, VT);
+
+    // We don't currently handle Custom or Promote for strict FP pseudo-ops.
+    // For now, we just expand for those cases.
+    if (Action != Legal)
+      Action = Expand;
+
+    return Action;
+  }
+
   /// Return true if the specified operation is legal on this target or can be
   /// made legal with custom lowering. This is used to help guide high-level
   /// lowering decisions.
@@ -800,7 +858,7 @@ public:
   }
 
   /// Return true if lowering to a jump table is allowed.
-  bool areJTsAllowed(const Function *Fn) const {
+  virtual bool areJTsAllowed(const Function *Fn) const {
     if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
       return false;
 
@@ -812,7 +870,7 @@ public:
   bool rangeFitsInWord(const APInt &Low, const APInt &High,
                        const DataLayout &DL) const {
     // FIXME: Using the pointer type doesn't seem ideal.
-    uint64_t BW = DL.getPointerSizeInBits();
+    uint64_t BW = DL.getIndexSizeInBits(0u);
     uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
     return Range <= BW;
   }
@@ -820,7 +878,7 @@ public:
   /// Return true if lowering to a jump table is suitable for a set of case
   /// clusters which may contain \p NumCases cases, \p Range range of values.
   /// FIXME: This function check the maximum table size and density, but the
-  /// minimum size is not checked. It would be nice if the the minimum size is
+  /// minimum size is not checked. It would be nice if the minimum size is
   /// also combined within this function. Currently, the minimum size check is
   /// performed in findJumpTable() in SelectionDAGBuiler and
   /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
@@ -986,9 +1044,14 @@ public:
 
   /// Return true if the specified condition code is legal on this target.
   bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
-    return
-      getCondCodeAction(CC, VT) == Legal ||
-      getCondCodeAction(CC, VT) == Custom;
+    return getCondCodeAction(CC, VT) == Legal;
+  }
+
+  /// Return true if the specified condition code is legal or custom on this
+  /// target.
+  bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
+    return getCondCodeAction(CC, VT) == Legal ||
+           getCondCodeAction(CC, VT) == Custom;
   }
 
   /// If the action for this operation is to promote, this method returns the
@@ -1110,10 +1173,6 @@ public:
   /// Certain combinations of ABIs, Targets and features require that types
   /// are legal for some operations and not for other operations.
   /// For MIPS all vector types must be passed through the integer register set.
-  virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
-    return getRegisterType(VT);
-  }
-
   virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
                                             EVT VT) const {
     return getRegisterType(Context, VT);
@@ -1172,7 +1231,7 @@ public:
     return getPointerTy(DL).getSizeInBits();
   }
 
-  /// \brief Get maximum # of store operations permitted for llvm.memset
+  /// Get maximum # of store operations permitted for llvm.memset
   ///
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memset. The value is set by the target at the
@@ -1182,7 +1241,7 @@ public:
     return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
   }
 
-  /// \brief Get maximum # of store operations permitted for llvm.memcpy
+  /// Get maximum # of store operations permitted for llvm.memcpy
   ///
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memcpy. The value is set by the target at the
@@ -1192,6 +1251,15 @@ public:
     return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
   }
 
+  /// \brief Get maximum # of store operations to be glued together
+  ///
+  /// This function returns the maximum number of store operations permitted
+  /// to glue together during lowering of llvm.memcpy. The value is set by
+  //  the target at the performance threshold for such a replacement.
+  virtual unsigned getMaxGluedStoresPerMemcpy() const {
+    return MaxGluedStoresPerMemcpy;
+  }
+
   /// Get maximum # of load operations permitted for memcmp
   ///
   /// This function returns the maximum number of load operations permitted
@@ -1202,7 +1270,19 @@ public:
     return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
   }
 
-  /// \brief Get maximum # of store operations permitted for llvm.memmove
+  /// For memcmp expansion when the memcmp result is only compared equal or
+  /// not-equal to 0, allow up to this number of load pairs per block. As an
+  /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
+  ///   a0 = load2bytes &a[0]
+  ///   b0 = load2bytes &b[0]
+  ///   a2 = load1byte  &a[2]
+  ///   b2 = load1byte  &b[2]
+  ///   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0
+  virtual unsigned getMemcmpEqZeroLoadsPerBlock() const {
+    return 1;
+  }
+
+  /// Get maximum # of store operations permitted for llvm.memmove
   ///
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memmove. The value is set by the target at the
@@ -1212,7 +1292,7 @@ public:
     return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
   }
 
-  /// \brief Determine if the target supports unaligned memory accesses.
+  /// Determine if the target supports unaligned memory accesses.
   ///
   /// This function returns true if the target allows unaligned memory accesses
   /// of the specified type in the given address space. If true, it also returns
@@ -1350,7 +1430,7 @@ public:
   /// If the target has a standard location for the stack protector guard,
   /// returns the address of that location. Otherwise, returns nullptr.
   /// DEPRECATED: please override useLoadStackGuardNode and customize
-  ///             LOAD_STACK_GUARD, or customize @llvm.stackguard().
+  ///             LOAD_STACK_GUARD, or customize \@llvm.stackguard().
   virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
 
   /// Inserts necessary declarations for SSP (stack protection) purpose.
@@ -1905,7 +1985,7 @@ public:
                                      Type *Ty, unsigned AddrSpace,
                                      Instruction *I = nullptr) const;
 
-  /// \brief Return the cost of the scaling factor used in the addressing mode
+  /// Return the cost of the scaling factor used in the addressing mode
   /// represented by AM for this target, for a load/store of the specified type.
   ///
   /// If the AM is supported, the return value must be >= 0.
@@ -2098,11 +2178,14 @@ public:
     return false;
   }
 
-  /// \brief Get the maximum supported factor for interleaved memory accesses.
+  /// Return true if the target has a vector blend instruction.
+  virtual bool hasVectorBlend() const { return false; }
+
+  /// Get the maximum supported factor for interleaved memory accesses.
   /// Default to be the minimum interleave factor: 2.
   virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
 
-  /// \brief Lower an interleaved load to target specific intrinsics. Return
+  /// Lower an interleaved load to target specific intrinsics. Return
   /// true on success.
   ///
   /// \p LI is the vector load instruction.
@@ -2116,7 +2199,7 @@ public:
     return false;
   }
 
-  /// \brief Lower an interleaved store to target specific intrinsics. Return
+  /// Lower an interleaved store to target specific intrinsics. Return
   /// true on success.
   ///
   /// \p SI is the vector store instruction.
@@ -2189,7 +2272,7 @@ public:
     return false;
   }
 
-  /// \brief Return true if it is beneficial to convert a load of a constant to
+  /// Return true if it is beneficial to convert a load of a constant to
   /// just the constant itself.
   /// On some targets it might be more efficient to use a combination of
   /// arithmetic instructions to materialize the constant instead of loading it
@@ -2214,6 +2297,11 @@ public:
     return false;
   }
 
+  // Return true if CodeGenPrepare should consider splitting large offset of a
+  // GEP to make the GEP fit into the addressing mode and can be sunk into the
+  // same blocks of its users.
+  virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
+
   //===--------------------------------------------------------------------===//
   // Runtime Library hooks
   //
@@ -2453,7 +2541,7 @@ protected:
   /// expected to be merged.
   unsigned GatherAllAliasesMaxDepth;
 
-  /// \brief Specify maximum number of store instructions per memset call.
+  /// Specify maximum number of store instructions per memset call.
   ///
   /// When lowering \@llvm.memset this field specifies the maximum number of
   /// store operations that may be substituted for the call to memset. Targets
@@ -2469,7 +2557,7 @@ protected:
   /// to memset, used for functions with OptSize attribute.
   unsigned MaxStoresPerMemsetOptSize;
 
-  /// \brief Specify maximum bytes of store instructions per memcpy call.
+  /// Specify maximum bytes of store instructions per memcpy call.
   ///
   /// When lowering \@llvm.memcpy this field specifies the maximum number of
   /// store operations that may be substituted for a call to memcpy. Targets
@@ -2482,13 +2570,21 @@ protected:
   /// constant size.
   unsigned MaxStoresPerMemcpy;
 
+
+  /// \brief Specify max number of store instructions to glue in inlined memcpy.
+  ///
+  /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
+  /// of store instructions to keep together. This helps in pairing and
+  //  vectorization later on.
+  unsigned MaxGluedStoresPerMemcpy = 0;
+
   /// Maximum number of store operations that may be substituted for a call to
   /// memcpy, used for functions with OptSize attribute.
   unsigned MaxStoresPerMemcpyOptSize;
   unsigned MaxLoadsPerMemcmp;
   unsigned MaxLoadsPerMemcmpOptSize;
 
-  /// \brief Specify maximum bytes of store instructions per memmove call.
+  /// Specify maximum bytes of store instructions per memmove call.
   ///
   /// When lowering \@llvm.memmove this field specifies the maximum number of
   /// store instructions that may be substituted for a call to memmove. Targets
@@ -2520,6 +2616,16 @@ protected:
   /// sequence of memory operands that is recognized by PrologEpilogInserter.
   MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
                                     MachineBasicBlock *MBB) const;
+
+  /// Replace/modify the XRay custom event operands with target-dependent
+  /// details.
+  MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
+                                         MachineBasicBlock *MBB) const;
+
+  /// Replace/modify the XRay typed event operands with target-dependent
+  /// details.
+  MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
+                                        MachineBasicBlock *MBB) const;
 };
 
 /// This class defines information used to lower LLVM code to legal SelectionDAG
@@ -2539,6 +2645,16 @@ public:
 
   bool isPositionIndependent() const;
 
+  virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
+                                          FunctionLoweringInfo *FLI,
+                                          DivergenceAnalysis *DA) const {
+    return false;
+  }
+
+  virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
+    return false;
+  }
+
   /// Returns true by value, base pointer and offset pointer and addressing mode
   /// by reference if the node's address can be legally represented as
   /// pre-indexed load / store address.
@@ -2690,6 +2806,30 @@ public:
   bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
                             DAGCombinerInfo &DCI) const;
 
+  /// Look at Vector Op. At this point, we know that only the DemandedElts
+  /// elements of the result of Op are ever used downstream.  If we can use
+  /// this information to simplify Op, create a new simplified DAG node and
+  /// return true, storing the original and new nodes in TLO.
+  /// Otherwise, analyze the expression and return a mask of KnownUndef and
+  /// KnownZero elements for the expression (used to simplify the caller).
+  /// The KnownUndef/Zero elements may only be accurate for those bits
+  /// in the DemandedMask.
+  /// \p AssumeSingleUse When this parameter is true, this function will
+  ///    attempt to simplify \p Op even if there are multiple uses.
+  ///    Callers are responsible for correctly updating the DAG based on the
+  ///    results of this function, because simply replacing replacing TLO.Old
+  ///    with TLO.New will be incorrect when this parameter is true and TLO.Old
+  ///    has multiple uses.
+  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
+                                  APInt &KnownUndef, APInt &KnownZero,
+                                  TargetLoweringOpt &TLO, unsigned Depth = 0,
+                                  bool AssumeSingleUse = false) const;
+
+  /// Helper wrapper around SimplifyDemandedVectorElts
+  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
+                                  APInt &KnownUndef, APInt &KnownZero,
+                                  DAGCombinerInfo &DCI) const;
+
   /// Determine which of the bits specified in Mask are known to be either zero
   /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
   /// argument allows us to only collect the known bits that are shared by the
@@ -2718,6 +2858,15 @@ public:
                                                    const SelectionDAG &DAG,
                                                    unsigned Depth = 0) const;
 
+  /// Attempt to simplify any target nodes based on the demanded vector
+  /// elements, returning true on success. Otherwise, analyze the expression and
+  /// return a mask of KnownUndef and KnownZero elements for the expression
+  /// (used to simplify the caller). The KnownUndef/Zero elements may only be
+  /// accurate for those bits in the DemandedMask
+  virtual bool SimplifyDemandedVectorEltsForTargetNode(
+      SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
+      APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
+
   struct DAGCombinerInfo {
     void *DC;  // The DAG Combiner object.
     CombineLevel Level;
@@ -2731,7 +2880,7 @@ public:
 
     bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
     bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
-    bool isAfterLegalizeVectorOps() const {
+    bool isAfterLegalizeDAG() const {
       return Level == AfterLegalizeDAG;
     }
     CombineLevel getDAGCombineLevel() { return Level; }
@@ -2753,12 +2902,8 @@ public:
   /// from getBooleanContents().
   bool isConstFalseVal(const SDNode *N) const;
 
-  /// Return a constant of type VT that contains a true value that respects
-  /// getBooleanContents()
-  SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const;
-
   /// Return if \p N is a True value when extended to \p VT.
-  bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const;
+  bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
 
   /// Try to simplify a setcc built with the specified operands and cc. If it is
   /// unable to simplify it, return a null SDValue.
@@ -3479,7 +3624,7 @@ public:
   /// bounds the returned pointer is unspecified, but will be within the vector
   /// bounds.
   SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
-                                  SDValue Idx) const;
+                                  SDValue Index) const;
 
   //===--------------------------------------------------------------------===//
   // Instruction Emitting Hooks
@@ -3518,6 +3663,13 @@ public:
   virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
                                           SelectionDAG &DAG) const;
 
+  /// Expands target specific indirect branch for the case of JumpTable
+  /// expanasion.
+  virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr,
+                                         SelectionDAG &DAG) const {
+    return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr);
+  }
+
   // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
   // If we're comparing for equality to zero and isCtlzFast is true, expose the
   // fact that this can be implemented as a ctlz/srl pair, so that the dag
@@ -3528,6 +3680,11 @@ private:
   SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
                                ISD::CondCode Cond, DAGCombinerInfo &DCI,
                                const SDLoc &DL) const;
+
+  SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
+                                               SDValue N1, ISD::CondCode Cond,
+                                               DAGCombinerInfo &DCI,
+                                               const SDLoc &DL) const;
 };
 
 /// Given an LLVM IR type and return type attributes, compute the return value