summaryrefslogtreecommitdiff
path: root/include/llvm/CodeGen/TargetLowering.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/CodeGen/TargetLowering.h')
-rw-r--r--include/llvm/CodeGen/TargetLowering.h243
1 files changed, 200 insertions, 43 deletions
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 380e3b19dc80..d5ff71cf9ac2 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -29,9 +29,9 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -52,6 +52,7 @@
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
@@ -222,7 +223,7 @@ public:
virtual ~TargetLoweringBase() = default;
protected:
- /// \brief Initialize all of the actions to default values.
+ /// Initialize all of the actions to default values.
void initActions();
public:
@@ -253,7 +254,8 @@ public:
/// A documentation for this function would be nice...
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
- EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const;
+ EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
+ bool LegalTypes = true) const;
/// Returns the type to be used for the index operand of:
/// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
@@ -421,17 +423,17 @@ public:
return true;
}
- /// \brief Return true if it is cheap to speculate a call to intrinsic cttz.
+ /// Return true if it is cheap to speculate a call to intrinsic cttz.
virtual bool isCheapToSpeculateCttz() const {
return false;
}
- /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz.
+ /// Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool isCheapToSpeculateCtlz() const {
return false;
}
- /// \brief Return true if ctlz instruction is fast.
+ /// Return true if ctlz instruction is fast.
virtual bool isCtlzFast() const {
return false;
}
@@ -444,13 +446,13 @@ public:
return false;
}
- /// \brief Return true if it is cheaper to split the store of a merged int val
+ /// Return true if it is cheaper to split the store of a merged int val
/// from a pair of smaller values into multiple stores.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
return false;
}
- /// \brief Return if the target supports combining a
+ /// Return if the target supports combining a
/// chain like:
/// \code
/// %andResult = and %val1, #mask
@@ -507,7 +509,30 @@ public:
return hasAndNotCompare(X);
}
- /// \brief Return true if the target wants to use the optimization that
+ /// There are two ways to clear extreme bits (either low or high):
+ /// Mask: x & (-1 << y) (the instcombine canonical form)
+ /// Shifts: x >> y << y
+ /// Return true if the variant with 2 shifts is preferred.
+ /// Return false if there is no preference.
+ virtual bool preferShiftsToClearExtremeBits(SDValue X) const {
+ // By default, let's assume that no one prefers shifts.
+ return false;
+ }
+
+ /// Should we tranform the IR-optimal check for whether given truncation
+ /// down into KeptBits would be truncating or not:
+ /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+ /// Into it's more traditional form:
+ /// ((%x << C) a>> C) dstcond %x
+ /// Return true if we should transform.
+ /// Return false if there is no preference.
+ virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
+ unsigned KeptBits) const {
+ // By default, let's assume that no one prefers shifts.
+ return false;
+ }
+
+ /// Return true if the target wants to use the optimization that
/// turns ext(promotableInst1(...(promotableInstN(load)))) into
/// promotedInst1(...(promotedInstN(ext(load)))).
bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
@@ -746,10 +771,10 @@ public:
/// operations don't trap except for integer divide and remainder.
virtual bool canOpTrap(unsigned Op, EVT VT) const;
- /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
- /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to replace
- /// a VAND with a constant pool entry.
- virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+ /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
+ /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
+ /// constant pool entry.
+ virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
EVT /*VT*/) const {
return false;
}
@@ -765,6 +790,39 @@ public:
return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
}
+ LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
+ unsigned EqOpc;
+ switch (Op) {
+ default: llvm_unreachable("Unexpected FP pseudo-opcode");
+ case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
+ case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
+ case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
+ case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
+ case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
+ case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
+ case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
+ case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
+ case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
+ case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
+ case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
+ case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
+ case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
+ case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
+ case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
+ case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
+ }
+
+ auto Action = getOperationAction(EqOpc, VT);
+
+ // We don't currently handle Custom or Promote for strict FP pseudo-ops.
+ // For now, we just expand for those cases.
+ if (Action != Legal)
+ Action = Expand;
+
+ return Action;
+ }
+
/// Return true if the specified operation is legal on this target or can be
/// made legal with custom lowering. This is used to help guide high-level
/// lowering decisions.
@@ -800,7 +858,7 @@ public:
}
/// Return true if lowering to a jump table is allowed.
- bool areJTsAllowed(const Function *Fn) const {
+ virtual bool areJTsAllowed(const Function *Fn) const {
if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
return false;
@@ -812,7 +870,7 @@ public:
bool rangeFitsInWord(const APInt &Low, const APInt &High,
const DataLayout &DL) const {
// FIXME: Using the pointer type doesn't seem ideal.
- uint64_t BW = DL.getPointerSizeInBits();
+ uint64_t BW = DL.getIndexSizeInBits(0u);
uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
return Range <= BW;
}
@@ -820,7 +878,7 @@ public:
/// Return true if lowering to a jump table is suitable for a set of case
/// clusters which may contain \p NumCases cases, \p Range range of values.
/// FIXME: This function check the maximum table size and density, but the
- /// minimum size is not checked. It would be nice if the the minimum size is
+ /// minimum size is not checked. It would be nice if the minimum size is
/// also combined within this function. Currently, the minimum size check is
/// performed in findJumpTable() in SelectionDAGBuiler and
/// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
@@ -986,9 +1044,14 @@ public:
/// Return true if the specified condition code is legal on this target.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
- return
- getCondCodeAction(CC, VT) == Legal ||
- getCondCodeAction(CC, VT) == Custom;
+ return getCondCodeAction(CC, VT) == Legal;
+ }
+
+ /// Return true if the specified condition code is legal or custom on this
+ /// target.
+ bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
+ return getCondCodeAction(CC, VT) == Legal ||
+ getCondCodeAction(CC, VT) == Custom;
}
/// If the action for this operation is to promote, this method returns the
@@ -1110,10 +1173,6 @@ public:
/// Certain combinations of ABIs, Targets and features require that types
/// are legal for some operations and not for other operations.
/// For MIPS all vector types must be passed through the integer register set.
- virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
- return getRegisterType(VT);
- }
-
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
EVT VT) const {
return getRegisterType(Context, VT);
@@ -1172,7 +1231,7 @@ public:
return getPointerTy(DL).getSizeInBits();
}
- /// \brief Get maximum # of store operations permitted for llvm.memset
+ /// Get maximum # of store operations permitted for llvm.memset
///
/// This function returns the maximum number of store operations permitted
/// to replace a call to llvm.memset. The value is set by the target at the
@@ -1182,7 +1241,7 @@ public:
return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
}
- /// \brief Get maximum # of store operations permitted for llvm.memcpy
+ /// Get maximum # of store operations permitted for llvm.memcpy
///
/// This function returns the maximum number of store operations permitted
/// to replace a call to llvm.memcpy. The value is set by the target at the
@@ -1192,6 +1251,15 @@ public:
return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
}
+ /// \brief Get maximum # of store operations to be glued together
+ ///
+ /// This function returns the maximum number of store operations permitted
+ /// to glue together during lowering of llvm.memcpy. The value is set by
+ // the target at the performance threshold for such a replacement.
+ virtual unsigned getMaxGluedStoresPerMemcpy() const {
+ return MaxGluedStoresPerMemcpy;
+ }
+
/// Get maximum # of load operations permitted for memcmp
///
/// This function returns the maximum number of load operations permitted
@@ -1202,7 +1270,19 @@ public:
return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
}
- /// \brief Get maximum # of store operations permitted for llvm.memmove
+ /// For memcmp expansion when the memcmp result is only compared equal or
+ /// not-equal to 0, allow up to this number of load pairs per block. As an
+ /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
+ /// a0 = load2bytes &a[0]
+ /// b0 = load2bytes &b[0]
+ /// a2 = load1byte &a[2]
+ /// b2 = load1byte &b[2]
+ /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
+ virtual unsigned getMemcmpEqZeroLoadsPerBlock() const {
+ return 1;
+ }
+
+ /// Get maximum # of store operations permitted for llvm.memmove
///
/// This function returns the maximum number of store operations permitted
/// to replace a call to llvm.memmove. The value is set by the target at the
@@ -1212,7 +1292,7 @@ public:
return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
}
- /// \brief Determine if the target supports unaligned memory accesses.
+ /// Determine if the target supports unaligned memory accesses.
///
/// This function returns true if the target allows unaligned memory accesses
/// of the specified type in the given address space. If true, it also returns
@@ -1350,7 +1430,7 @@ public:
/// If the target has a standard location for the stack protector guard,
/// returns the address of that location. Otherwise, returns nullptr.
/// DEPRECATED: please override useLoadStackGuardNode and customize
- /// LOAD_STACK_GUARD, or customize @llvm.stackguard().
+ /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
/// Inserts necessary declarations for SSP (stack protection) purpose.
@@ -1905,7 +1985,7 @@ public:
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;
- /// \brief Return the cost of the scaling factor used in the addressing mode
+ /// Return the cost of the scaling factor used in the addressing mode
/// represented by AM for this target, for a load/store of the specified type.
///
/// If the AM is supported, the return value must be >= 0.
@@ -2098,11 +2178,14 @@ public:
return false;
}
- /// \brief Get the maximum supported factor for interleaved memory accesses.
+ /// Return true if the target has a vector blend instruction.
+ virtual bool hasVectorBlend() const { return false; }
+
+ /// Get the maximum supported factor for interleaved memory accesses.
/// Default to be the minimum interleave factor: 2.
virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
- /// \brief Lower an interleaved load to target specific intrinsics. Return
+ /// Lower an interleaved load to target specific intrinsics. Return
/// true on success.
///
/// \p LI is the vector load instruction.
@@ -2116,7 +2199,7 @@ public:
return false;
}
- /// \brief Lower an interleaved store to target specific intrinsics. Return
+ /// Lower an interleaved store to target specific intrinsics. Return
/// true on success.
///
/// \p SI is the vector store instruction.
@@ -2189,7 +2272,7 @@ public:
return false;
}
- /// \brief Return true if it is beneficial to convert a load of a constant to
+ /// Return true if it is beneficial to convert a load of a constant to
/// just the constant itself.
/// On some targets it might be more efficient to use a combination of
/// arithmetic instructions to materialize the constant instead of loading it
@@ -2214,6 +2297,11 @@ public:
return false;
}
+ // Return true if CodeGenPrepare should consider splitting large offset of a
+ // GEP to make the GEP fit into the addressing mode and can be sunk into the
+ // same blocks of its users.
+ virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
@@ -2453,7 +2541,7 @@ protected:
/// expected to be merged.
unsigned GatherAllAliasesMaxDepth;
- /// \brief Specify maximum number of store instructions per memset call.
+ /// Specify maximum number of store instructions per memset call.
///
/// When lowering \@llvm.memset this field specifies the maximum number of
/// store operations that may be substituted for the call to memset. Targets
@@ -2469,7 +2557,7 @@ protected:
/// to memset, used for functions with OptSize attribute.
unsigned MaxStoresPerMemsetOptSize;
- /// \brief Specify maximum bytes of store instructions per memcpy call.
+ /// Specify maximum bytes of store instructions per memcpy call.
///
/// When lowering \@llvm.memcpy this field specifies the maximum number of
/// store operations that may be substituted for a call to memcpy. Targets
@@ -2482,13 +2570,21 @@ protected:
/// constant size.
unsigned MaxStoresPerMemcpy;
+
+ /// \brief Specify max number of store instructions to glue in inlined memcpy.
+ ///
+ /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
+ /// of store instructions to keep together. This helps in pairing and
+ // vectorization later on.
+ unsigned MaxGluedStoresPerMemcpy = 0;
+
/// Maximum number of store operations that may be substituted for a call to
/// memcpy, used for functions with OptSize attribute.
unsigned MaxStoresPerMemcpyOptSize;
unsigned MaxLoadsPerMemcmp;
unsigned MaxLoadsPerMemcmpOptSize;
- /// \brief Specify maximum bytes of store instructions per memmove call.
+ /// Specify maximum bytes of store instructions per memmove call.
///
/// When lowering \@llvm.memmove this field specifies the maximum number of
/// store instructions that may be substituted for a call to memmove. Targets
@@ -2520,6 +2616,16 @@ protected:
/// sequence of memory operands that is recognized by PrologEpilogInserter.
MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+
+ /// Replace/modify the XRay custom event operands with target-dependent
+ /// details.
+ MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
+
+ /// Replace/modify the XRay typed event operands with target-dependent
+ /// details.
+ MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
};
/// This class defines information used to lower LLVM code to legal SelectionDAG
@@ -2539,6 +2645,16 @@ public:
bool isPositionIndependent() const;
+ virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
+ FunctionLoweringInfo *FLI,
+ DivergenceAnalysis *DA) const {
+ return false;
+ }
+
+ virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
+ return false;
+ }
+
/// Returns true by value, base pointer and offset pointer and addressing mode
/// by reference if the node's address can be legally represented as
/// pre-indexed load / store address.
@@ -2690,6 +2806,30 @@ public:
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
DAGCombinerInfo &DCI) const;
+ /// Look at Vector Op. At this point, we know that only the DemandedElts
+ /// elements of the result of Op are ever used downstream. If we can use
+ /// this information to simplify Op, create a new simplified DAG node and
+ /// return true, storing the original and new nodes in TLO.
+ /// Otherwise, analyze the expression and return a mask of KnownUndef and
+ /// KnownZero elements for the expression (used to simplify the caller).
+ /// The KnownUndef/Zero elements may only be accurate for those bits
+ /// in the DemandedMask.
+ /// \p AssumeSingleUse When this parameter is true, this function will
+ /// attempt to simplify \p Op even if there are multiple uses.
+ /// Callers are responsible for correctly updating the DAG based on the
+ /// results of this function, because simply replacing replacing TLO.Old
+ /// with TLO.New will be incorrect when this parameter is true and TLO.Old
+ /// has multiple uses.
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
+ APInt &KnownUndef, APInt &KnownZero,
+ TargetLoweringOpt &TLO, unsigned Depth = 0,
+ bool AssumeSingleUse = false) const;
+
+ /// Helper wrapper around SimplifyDemandedVectorElts
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
+ APInt &KnownUndef, APInt &KnownZero,
+ DAGCombinerInfo &DCI) const;
+
/// Determine which of the bits specified in Mask are known to be either zero
/// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
/// argument allows us to only collect the known bits that are shared by the
@@ -2718,6 +2858,15 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const;
+ /// Attempt to simplify any target nodes based on the demanded vector
+ /// elements, returning true on success. Otherwise, analyze the expression and
+ /// return a mask of KnownUndef and KnownZero elements for the expression
+ /// (used to simplify the caller). The KnownUndef/Zero elements may only be
+ /// accurate for those bits in the DemandedMask
+ virtual bool SimplifyDemandedVectorEltsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
+ APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
+
struct DAGCombinerInfo {
void *DC; // The DAG Combiner object.
CombineLevel Level;
@@ -2731,7 +2880,7 @@ public:
bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
- bool isAfterLegalizeVectorOps() const {
+ bool isAfterLegalizeDAG() const {
return Level == AfterLegalizeDAG;
}
CombineLevel getDAGCombineLevel() { return Level; }
@@ -2753,12 +2902,8 @@ public:
/// from getBooleanContents().
bool isConstFalseVal(const SDNode *N) const;
- /// Return a constant of type VT that contains a true value that respects
- /// getBooleanContents()
- SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const;
-
/// Return if \p N is a True value when extended to \p VT.
- bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const;
+ bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
@@ -3479,7 +3624,7 @@ public:
/// bounds the returned pointer is unspecified, but will be within the vector
/// bounds.
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
- SDValue Idx) const;
+ SDValue Index) const;
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
@@ -3518,6 +3663,13 @@ public:
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
+ /// Expands target specific indirect branch for the case of JumpTable
+ /// expanasion.
+ virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr,
+ SelectionDAG &DAG) const {
+ return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr);
+ }
+
// seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
// If we're comparing for equality to zero and isCtlzFast is true, expose the
// fact that this can be implemented as a ctlz/srl pair, so that the dag
@@ -3528,6 +3680,11 @@ private:
SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+
+ SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const;
};
/// Given an LLVM IR type and return type attributes, compute the return value