diff options
Diffstat (limited to 'include/llvm/Target')
-rw-r--r-- | include/llvm/Target/GenericOpcodes.td | 87 | ||||
-rw-r--r-- | include/llvm/Target/GlobalISel/Combine.td | 103 | ||||
-rw-r--r-- | include/llvm/Target/GlobalISel/SelectionDAGCompat.td | 25 | ||||
-rw-r--r-- | include/llvm/Target/Target.td | 33 | ||||
-rw-r--r-- | include/llvm/Target/TargetCallingConv.td | 6 | ||||
-rw-r--r-- | include/llvm/Target/TargetItinerary.td | 11 | ||||
-rw-r--r-- | include/llvm/Target/TargetLoweringObjectFile.h | 3 | ||||
-rw-r--r-- | include/llvm/Target/TargetMachine.h | 28 | ||||
-rw-r--r-- | include/llvm/Target/TargetSchedule.td | 8 | ||||
-rw-r--r-- | include/llvm/Target/TargetSelectionDAG.td | 146 |
10 files changed, 399 insertions, 51 deletions
diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td index 45718327b4a7..4b49dfd4dd18 100644 --- a/include/llvm/Target/GenericOpcodes.td +++ b/include/llvm/Target/GenericOpcodes.td @@ -15,7 +15,9 @@ // Unary ops. //------------------------------------------------------------------------------ -class GenericInstruction : StandardPseudoInstruction; +class GenericInstruction : StandardPseudoInstruction { + let isPreISelOpcode = 1; +} // Extend the underlying scalar type of an operation, leaving the high bits // unspecified. @@ -33,6 +35,20 @@ def G_SEXT : GenericInstruction { let hasSideEffects = 0; } +// Sign extend the a value from an arbitrary bit position, copying the sign bit +// into all bits above it. This is equivalent to a shl + ashr pair with an +// appropriate shift amount. $sz is an immediate (MachineOperand::isImm() +// returns true) to allow targets to have some bitwidths legal and others +// lowered. This opcode is particularly useful if the target has sign-extension +// instructions that are cheaper than the constituent shifts as the optimizer is +// able to make decisions on whether it's better to hang on to the G_SEXT_INREG +// or to lower it and optimize the individual shifts. +def G_SEXT_INREG : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src, untyped_imm_0:$sz); + let hasSideEffects = 0; +} + // Zero extend the underlying scalar type of an operation, putting zero bits // into the newly-created space. def G_ZEXT : GenericInstruction { @@ -157,6 +173,12 @@ def G_BSWAP : GenericInstruction { let hasSideEffects = 0; } +def G_BITREVERSE : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = 0; +} + def G_ADDRSPACE_CAST : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); @@ -175,6 +197,12 @@ def G_JUMP_TABLE : GenericInstruction { let hasSideEffects = 0; } +def G_DYN_STACKALLOC : GenericInstruction { + let OutOperandList = (outs ptype0:$dst); + let InOperandList = (ins type1:$size, i32imm:$align); + let hasSideEffects = 1; +} + //------------------------------------------------------------------------------ // Binary ops. //------------------------------------------------------------------------------ @@ -598,6 +626,15 @@ def G_FMA : GenericInstruction { let isCommutable = 0; } +/// Generic FP multiply and add. Perform a * b + c, while getting the +/// same result as the separately rounded operations, unlike G_FMA. +def G_FMAD : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; + let isCommutable = 0; +} + // Generic FP division. def G_FDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); @@ -725,7 +762,11 @@ def G_INTRINSIC_ROUND : GenericInstruction { // Memory ops //------------------------------------------------------------------------------ -// Generic load. Expects a MachineMemOperand in addition to explicit operands. +// Generic load. Expects a MachineMemOperand in addition to explicit +// operands. If the result size is larger than the memory size, the +// high bits are undefined. If the result is a vector type and larger +// than the memory size, the high elements are undefined (i.e. this is +// not a per-element, vector anyextload) def G_LOAD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins ptype1:$addr); @@ -749,6 +790,32 @@ def G_ZEXTLOAD : GenericInstruction { let mayLoad = 1; } +// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset. +// If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed) +// then the value is loaded from $newaddr. +def G_INDEXED_LOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD. +def G_INDEXED_SEXTLOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD. +def G_INDEXED_ZEXTLOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + // Generic store. Expects a MachineMemOperand in addition to explicit operands. def G_STORE : GenericInstruction { let OutOperandList = (outs); @@ -757,6 +824,15 @@ def G_STORE : GenericInstruction { let mayStore = 1; } +// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour. +def G_INDEXED_STORE : GenericInstruction { + let OutOperandList = (outs ptype0:$newaddr); + let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset, + unknown:$am); + let hasSideEffects = 0; + let mayStore = 1; +} + // Generic atomic cmpxchg with internal success check. Expects a // MachineMemOperand in addition to explicit operands. def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction { @@ -798,6 +874,8 @@ def G_ATOMICRMW_MAX : G_ATOMICRMW_OP; def G_ATOMICRMW_MIN : G_ATOMICRMW_OP; def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP; def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP; +def G_ATOMICRMW_FADD : G_ATOMICRMW_OP; +def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; def G_FENCE : GenericInstruction { let OutOperandList = (outs); @@ -947,9 +1025,12 @@ def G_EXTRACT_VECTOR_ELT : GenericInstruction { } // Generic shufflevector. +// +// The mask operand should be an IR Constant which exactly matches the +// corresponding mask for the IR shufflevector instruction. def G_SHUFFLE_VECTOR: GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type1:$v1, type1:$v2, type2:$mask); + let InOperandList = (ins type1:$v1, type1:$v2, unknown:$mask); let hasSideEffects = 0; } diff --git a/include/llvm/Target/GlobalISel/Combine.td b/include/llvm/Target/GlobalISel/Combine.td new file mode 100644 index 000000000000..dcac399fd693 --- /dev/null +++ b/include/llvm/Target/GlobalISel/Combine.td @@ -0,0 +1,103 @@ +//===- Combine.td - Combine rule definitions ---------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declare GlobalISel combine rules and provide mechanisms to opt-out. +// +//===----------------------------------------------------------------------===// + +// Common base class for GICombineRule and GICombineGroup. +class GICombine { + // See GICombineGroup. We only declare it here to make the tablegen pass + // simpler. + list<GICombine> Rules = ?; +} + +// A group of combine rules that can be added to a GICombiner or another group. +class GICombineGroup<list<GICombine> rules> : GICombine { + // The rules contained in this group. The rules in a group are flattened into + // a single list and sorted into whatever order is most efficient. However, + // they will never be re-ordered such that behaviour differs from the + // specified order. It is therefore possible to use the order of rules in this + // list to describe priorities. + let Rules = rules; +} + +// Declares a combiner helper class +class GICombinerHelper<string classname, list<GICombine> rules> + : GICombineGroup<rules> { + // The class name to use in the generated output. + string Classname = classname; + // The name of a run-time compiler option that will be generated to disable + // specific rules within this combiner. + string DisableRuleOption = ?; +} +class GICombineRule<dag defs, dag match, dag apply> : GICombine { + /// Defines the external interface of the match rule. This includes: + /// * The names of the root nodes (requires at least one) + /// See GIDefKind for details. + dag Defs = defs; + + /// Defines the things which must be true for the pattern to match + /// See GIMatchKind for details. + dag Match = match; + + /// Defines the things which happen after the decision is made to apply a + /// combine rule. + /// See GIApplyKind for details. + dag Apply = apply; +} + +/// The operator at the root of a GICombineRule.Defs dag. +def defs; + +/// All arguments of the defs operator must be subclasses of GIDefKind or +/// sub-dags whose operator is GIDefKindWithArgs. +class GIDefKind; +class GIDefKindWithArgs; +/// Declare a root node. There must be at least one of these in every combine +/// rule. +/// TODO: The plan is to elide `root` definitions and determine it from the DAG +/// itself with an overide for situations where the usual determination +/// is incorrect. +def root : GIDefKind; + +/// The operator at the root of a GICombineRule.Match dag. +def match; +/// All arguments of the match operator must be either: +/// * A subclass of GIMatchKind +/// * A subclass of GIMatchKindWithArgs +/// * A MIR code block (deprecated) +/// The GIMatchKind and GIMatchKindWithArgs cases are described in more detail +/// in their definitions below. +/// For the Instruction case, these are collected into a DAG where operand names +/// that occur multiple times introduce edges. +class GIMatchKind; +class GIMatchKindWithArgs; + +/// The operator at the root of a GICombineRule.Apply dag. +def apply; +/// All arguments of the apply operator must be subclasses of GIApplyKind, or +/// sub-dags whose operator is GIApplyKindWithArgs, or an MIR block +/// (deprecated). +class GIApplyKind; +class GIApplyKindWithArgs; + +def copy_prop : GICombineRule< + (defs root:$d), + (match [{ return Helper.matchCombineCopy(${d}); }]), + (apply [{ Helper.applyCombineCopy(${d}); }])>; +def trivial_combines : GICombineGroup<[copy_prop]>; + +// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of +// all_combines because it wasn't there. +def elide_br_by_inverting_cond : GICombineRule< + (defs root:$d), + (match [{ return Helper.matchElideBrByInvertingCond(${d}); }]), + (apply [{ Helper.applyElideBrByInvertingCond(${d}); }])>; + +def all_combines : GICombineGroup<[trivial_combines]>; diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 6cc58d6521da..b846d2252b8d 100644 --- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -27,6 +27,7 @@ class GINodeEquiv<Instruction i, SDNode node> { // (ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE) but GlobalISel // stores this information in the MachineMemoryOperand. bit CheckMMOIsNonAtomic = 0; + bit CheckMMOIsAtomic = 0; // SelectionDAG has one node for all loads and uses predicates to // differentiate them. GlobalISel on the other hand uses separate opcodes. @@ -34,6 +35,10 @@ class GINodeEquiv<Instruction i, SDNode node> { // depending on the predicates on the node. Instruction IfSignExtend = ?; Instruction IfZeroExtend = ?; + + // SelectionDAG has one setcc for all compares. This differentiates + // for G_ICMP and G_FCMP. + Instruction IfFloatingPoint = ?; } // These are defined in the same order as the G_* instructions. @@ -46,6 +51,7 @@ def : GINodeEquiv<G_BITCAST, bitconvert>; // G_PTRTOINT - SelectionDAG has no equivalent. def : GINodeEquiv<G_CONSTANT, imm>; def : GINodeEquiv<G_FCONSTANT, fpimm>; +def : GINodeEquiv<G_IMPLICIT_DEF, undef>; def : GINodeEquiv<G_ADD, add>; def : GINodeEquiv<G_SUB, sub>; def : GINodeEquiv<G_MUL, mul>; @@ -72,6 +78,7 @@ def : GINodeEquiv<G_UITOFP, uint_to_fp>; def : GINodeEquiv<G_FADD, fadd>; def : GINodeEquiv<G_FSUB, fsub>; def : GINodeEquiv<G_FMA, fma>; +def : GINodeEquiv<G_FMAD, fmad>; def : GINodeEquiv<G_FMUL, fmul>; def : GINodeEquiv<G_FDIV, fdiv>; def : GINodeEquiv<G_FREM, frem>; @@ -85,6 +92,7 @@ def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>; def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>; def : GINodeEquiv<G_BR, br>; def : GINodeEquiv<G_BSWAP, bswap>; +def : GINodeEquiv<G_BITREVERSE, bitreverse>; def : GINodeEquiv<G_CTLZ, ctlz>; def : GINodeEquiv<G_CTTZ, cttz>; def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>; @@ -100,10 +108,15 @@ def : GINodeEquiv<G_FSQRT, fsqrt>; def : GINodeEquiv<G_FFLOOR, ffloor>; def : GINodeEquiv<G_FRINT, frint>; def : GINodeEquiv<G_FNEARBYINT, fnearbyint>; +def : GINodeEquiv<G_FCOPYSIGN, fcopysign>; def : GINodeEquiv<G_SMIN, smin>; def : GINodeEquiv<G_SMAX, smax>; def : GINodeEquiv<G_UMIN, umin>; def : GINodeEquiv<G_UMAX, umax>; +def : GINodeEquiv<G_FMINNUM, fminnum>; +def : GINodeEquiv<G_FMAXNUM, fmaxnum>; +def : GINodeEquiv<G_FMINNUM_IEEE, fminnum_ieee>; +def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>; // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some // complications that tablegen must take care of. For example, Predicates such @@ -117,6 +130,11 @@ def : GINodeEquiv<G_LOAD, ld> { let IfSignExtend = G_SEXTLOAD; let IfZeroExtend = G_ZEXTLOAD; } + +def : GINodeEquiv<G_ICMP, setcc> { + let IfFloatingPoint = G_FCMP; +} + // Broadly speaking G_STORE is equivalent to ISD::STORE but there are some // complications that tablegen must take care of. For example, predicates such // as isTruncStore require that this is not a perfect 1:1 mapping since a @@ -126,6 +144,11 @@ def : GINodeEquiv<G_LOAD, ld> { // G_STORE with a non-atomic MachineMemOperand. def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = 1; } +def : GINodeEquiv<G_LOAD, atomic_load> { + let CheckMMOIsNonAtomic = 0; + let CheckMMOIsAtomic = 1; +} + def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap>; def : GINodeEquiv<G_ATOMICRMW_XCHG, atomic_swap>; def : GINodeEquiv<G_ATOMICRMW_ADD, atomic_load_add>; @@ -138,6 +161,8 @@ def : GINodeEquiv<G_ATOMICRMW_MIN, atomic_load_min>; def : GINodeEquiv<G_ATOMICRMW_MAX, atomic_load_max>; def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin>; def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax>; +def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd>; +def : GINodeEquiv<G_ATOMICRMW_FSUB, atomic_load_fsub>; def : GINodeEquiv<G_FENCE, atomic_fence>; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index d58662e128e0..dd8679661b9a 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -351,7 +351,11 @@ def interleave; // RegisterTuples instances can be used in other set operations to form // register classes and so on. This is the only way of using the generated // registers. -class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> { +// +// RegNames may be specified to supply asm names for the generated tuples. +// If used must have the same size as the list of produced registers. +class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs, + list<string> RegNames = []> { // SubRegs - N lists of registers to be zipped up. Super-registers are // synthesized from the first element of each SubRegs list, the second // element and so on. @@ -360,6 +364,9 @@ class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> { // SubRegIndices - N SubRegIndex instances. This provides the names of the // sub-registers in the synthesized super-registers. list<SubRegIndex> SubRegIndices = Indices; + + // List of asm names for the generated tuple registers. + list<string> RegAsmNames = RegNames; } @@ -436,6 +443,15 @@ class InstructionEncoding { bit hasCompleteDecoder = 1; } +// Allows specifying an InstructionEncoding by HwMode. If an Instruction specifies +// an EncodingByHwMode, its Inst and Size members are ignored and Ts are used +// to encode and decode based on HwMode. +class EncodingByHwMode<list<HwMode> Ms = [], list<InstructionEncoding> Ts = []> + : HwModeSelect<Ms> { + // The length of this list must be the same as the length of Ms. + list<InstructionEncoding> Objects = Ts; +} + //===----------------------------------------------------------------------===// // Instruction set description - These classes correspond to the C++ classes in // the Target/TargetInstrInfo.h file. @@ -447,6 +463,10 @@ class Instruction : InstructionEncoding { dag InOperandList; // An dag containing the MI use operand list. string AsmString = ""; // The .s format to print the instruction with. + // Allows specifying a canonical InstructionEncoding by HwMode. If non-empty, + // the Inst member of this Instruction is ignored. + EncodingByHwMode EncodingInfos; + // Pattern - Set to the DAG pattern for this instruction, if we know of one, // otherwise, uninitialized. list<dag> Pattern; @@ -472,6 +492,10 @@ class Instruction : InstructionEncoding { // Added complexity passed onto matching pattern. int AddedComplexity = 0; + // Indicates if this is a pre-isel opcode that should be + // legalized/regbankselected/selected. + bit isPreISelOpcode = 0; + // These bits capture information about the high-level semantics of the // instruction. bit isReturn = 0; // Is this instruction a return instruction? @@ -834,6 +858,7 @@ def f64imm : Operand<f64>; class TypedOperand<string Ty> : Operand<untyped> { let OperandType = Ty; bit IsPointer = 0; + bit IsImmediate = 0; } def type0 : TypedOperand<"OPERAND_GENERIC_0">; @@ -852,6 +877,12 @@ let IsPointer = 1 in { def ptype5 : TypedOperand<"OPERAND_GENERIC_5">; } +// untyped_imm is for operands where isImm() will be true. It currently has no +// special behaviour and is only used for clarity. +def untyped_imm_0 : TypedOperand<"OPERAND_GENERIC_IMM_0"> { + let IsImmediate = 1; +} + /// zero_reg definition - Special node to stand for the zero register. /// def zero_reg; diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td index 1bc03cf8a49d..7b1973cc3828 100644 --- a/include/llvm/Target/TargetCallingConv.td +++ b/include/llvm/Target/TargetCallingConv.td @@ -152,6 +152,12 @@ class CCBitConvertToType<ValueType destTy> : CCAction { ValueType DestTy = destTy; } +/// CCTruncToType - If applied, this truncates the specified current value to +/// the specified type. +class CCTruncToType<ValueType destTy> : CCAction { + ValueType DestTy = destTy; +} + /// CCPassIndirect - If applied, this stores the value to stack and passes the pointer /// as normal argument. class CCPassIndirect<ValueType destTy> : CCAction { diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td index b68ed045520c..89e5abd947d0 100644 --- a/include/llvm/Target/TargetItinerary.td +++ b/include/llvm/Target/TargetItinerary.td @@ -127,6 +127,17 @@ class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp, list<FuncUnit> FU = fu; list<Bypass> BP = bp; list<InstrItinData> IID = iid; + // The packetizer automaton to use for this itinerary. By default all + // itineraries for a target are bundled up into the same automaton. This only + // works correctly when there are no conflicts in functional unit IDs between + // itineraries. For example, given two itineraries A<[SLOT_A]>, B<[SLOT_B]>, + // SLOT_A and SLOT_B will be assigned the same functional unit index, and + // the generated packetizer will confuse instructions referencing these slots. + // + // To avoid this, setting PacketizerNamespace to non-"" will cause this + // itinerary to be generated in a different automaton. The subtarget will need + // to declare a method "create##Namespace##DFAPacketizer()". + string PacketizerNamespace = ""; } // NoItineraries - A marker that can be used by processors without schedule diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h index 3a2497bff11e..d74341b23fb1 100644 --- a/include/llvm/Target/TargetLoweringObjectFile.h +++ b/include/llvm/Target/TargetLoweringObjectFile.h @@ -191,7 +191,8 @@ public: } /// Get the target specific PC relative GOT entry relocation - virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + virtual const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index cdf9f8bfd5ea..285c0ec0fb90 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -25,7 +25,7 @@ namespace llvm { class Function; class GlobalValue; -class MachineModuleInfo; +class MachineModuleInfoWrapperPass; class Mangler; class MCAsmInfo; class MCContext; @@ -284,12 +284,13 @@ public: /// emitted. Typically this will involve several steps of code generation. /// This method should return true if emission of this file type is not /// supported, or false on success. - /// \p MMI is an optional parameter that, if set to non-nullptr, + /// \p MMIWP is an optional parameter that, if set to non-nullptr, /// will be used to set the MachineModuloInfo for this PM. - virtual bool addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &, - raw_pwrite_stream *, CodeGenFileType, - bool /*DisableVerify*/ = true, - MachineModuleInfo *MMI = nullptr) { + virtual bool + addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &, + raw_pwrite_stream *, CodeGenFileType, + bool /*DisableVerify*/ = true, + MachineModuleInfoWrapperPass *MMIWP = nullptr) { return true; } @@ -341,12 +342,13 @@ public: /// Add passes to the specified pass manager to get the specified file /// emitted. Typically this will involve several steps of code generation. - /// \p MMI is an optional parameter that, if set to non-nullptr, - /// will be used to set the MachineModuloInfofor this PM. - bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, CodeGenFileType FileType, - bool DisableVerify = true, - MachineModuleInfo *MMI = nullptr) override; + /// \p MMIWP is an optional parameter that, if set to non-nullptr, + /// will be used to set the MachineModuloInfo for this PM. + bool + addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, CodeGenFileType FileType, + bool DisableVerify = true, + MachineModuleInfoWrapperPass *MMIWP = nullptr) override; /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It @@ -365,7 +367,7 @@ public: /// Adds an AsmPrinter pass to the pipeline that prints assembly or /// machine code from the MI representation. bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, CodeGenFileType FileTYpe, + raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context); /// True if the target uses physical regs at Prolog/Epilog insertion diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index a36d259df831..24f37e94da91 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -563,10 +563,10 @@ class RetireControlUnit<int bufferSize, int retirePerCycle> { // Base class for Load/StoreQueue. It is used to identify processor resources // which describe load/store queues in the LS unit. -class MemoryQueue<ProcResource PR> { - ProcResource QueueDescriptor = PR; +class MemoryQueue<ProcResourceKind PR> { + ProcResourceKind QueueDescriptor = PR; SchedMachineModel SchedModel = ?; } -class LoadQueue<ProcResource LDQueue> : MemoryQueue<LDQueue>; -class StoreQueue<ProcResource STQueue> : MemoryQueue<STQueue>; +class LoadQueue<ProcResourceKind LDQueue> : MemoryQueue<LDQueue>; +class StoreQueue<ProcResourceKind STQueue> : MemoryQueue<STQueue>; diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index b913a054ac2c..441f3d7d118d 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -137,9 +137,12 @@ def SDTFPSignOp : SDTypeProfile<1, 2, [ // fcopysign. def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0> ]>; -def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz +def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; +def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz + SDTCisInt<0>, SDTCisInt<1> +]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; @@ -239,6 +242,9 @@ def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> ]>; +def SDTVecReduce : SDTypeProfile<1, 1, [ // vector reduction + SDTCisInt<0>, SDTCisVec<1> +]>; def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract SDTCisSubVecOfVec<0,1>, SDTCisInt<2> @@ -393,6 +399,7 @@ def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>; def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; +def umulfixsat : SDNode<"ISD::UMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>; def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>; @@ -401,11 +408,11 @@ def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>; def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>; def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; -def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>; -def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>; -def ctpop : SDNode<"ISD::CTPOP" , SDTIntUnaryOp>; -def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>; -def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>; +def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>; +def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>; +def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>; +def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; +def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>; def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>; def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>; @@ -415,6 +422,12 @@ def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>; def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>; def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>; +def vecreduce_add : SDNode<"ISD::VECREDUCE_ADD", SDTVecReduce>; +def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>; +def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>; +def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>; +def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>; + def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>; def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>; def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>; @@ -493,12 +506,20 @@ def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_frint : SDNode<"ISD::STRICT_FRINT", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_lrint : SDNode<"ISD::STRICT_LRINT", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_llrint : SDNode<"ISD::STRICT_LLRINT", + SDTFPToIntOp, [SDNPHasChain]>; def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fceil : SDNode<"ISD::STRICT_FCEIL", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_lround : SDNode<"ISD::STRICT_LROUND", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_llround : SDNode<"ISD::STRICT_LLROUND", + SDTFPToIntOp, [SDNPHasChain]>; def strict_fround : SDNode<"ISD::STRICT_FROUND", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC", @@ -513,6 +534,10 @@ def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND", SDTFPRoundOp, [SDNPHasChain]>; def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND", SDTFPExtendOp, [SDNPHasChain]>; +def strict_fp_to_sint : SDNode<"ISD::STRICT_FP_TO_SINT", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_fp_to_uint : SDNode<"ISD::STRICT_FP_TO_UINT", + SDTFPToIntOp, [SDNPHasChain]>; def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; @@ -638,16 +663,32 @@ def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; //===----------------------------------------------------------------------===// // Selection DAG Condition Codes -class CondCode; // ISD::CondCode enums -def SETOEQ : CondCode; def SETOGT : CondCode; -def SETOGE : CondCode; def SETOLT : CondCode; def SETOLE : CondCode; -def SETONE : CondCode; def SETO : CondCode; def SETUO : CondCode; -def SETUEQ : CondCode; def SETUGT : CondCode; def SETUGE : CondCode; -def SETULT : CondCode; def SETULE : CondCode; def SETUNE : CondCode; - -def SETEQ : CondCode; def SETGT : CondCode; def SETGE : CondCode; -def SETLT : CondCode; def SETLE : CondCode; def SETNE : CondCode; - +class CondCode<string fcmpName = "", string icmpName = ""> { + string ICmpPredicate = icmpName; + string FCmpPredicate = fcmpName; +} + +// ISD::CondCode enums, and mapping to CmpInst::Predicate names +def SETOEQ : CondCode<"FCMP_OEQ">; +def SETOGT : CondCode<"FCMP_OGT">; +def SETOGE : CondCode<"FCMP_OGE">; +def SETOLT : CondCode<"FCMP_OLT">; +def SETOLE : CondCode<"FCMP_OLE">; +def SETONE : CondCode<"FCMP_ONE">; +def SETO : CondCode<"FCMP_ORD">; +def SETUO : CondCode<"FCMP_UNO">; +def SETUEQ : CondCode<"FCMP_UEQ">; +def SETUGT : CondCode<"FCMP_UGT", "ICMP_UGT">; +def SETUGE : CondCode<"FCMP_UGE", "ICMP_UGE">; +def SETULT : CondCode<"FCMP_ULT", "ICMP_ULT">; +def SETULE : CondCode<"FCMP_ULE", "ICMP_ULE">; +def SETUNE : CondCode<"FCMP_UNE">; +def SETEQ : CondCode<"", "ICMP_EQ">; +def SETGT : CondCode<"", "ICMP_SGT">; +def SETGE : CondCode<"", "ICMP_SGE">; +def SETLT : CondCode<"", "ICMP_SLT">; +def SETLE : CondCode<"", "ICMP_SLE">; +def SETNE : CondCode<"", "ICMP_NE">; //===----------------------------------------------------------------------===// // Selection DAG Node Transformation Functions. @@ -741,6 +782,10 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}], // If this empty, accept any address space. list<int> AddressSpaces = ?; + // cast<MemSDNode>(N)->getAlignment() >= + // If this is empty, accept any alignment. + int MinAlignment = ?; + // cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic bit IsAtomicOrderingMonotonic = ?; // cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire @@ -766,8 +811,6 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}], // cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>; // cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>; ValueType ScalarMemoryVT = ?; - - // TODO: Add alignment } // PatFrag - A version of PatFrags matching only a single fragment. @@ -813,6 +856,11 @@ class ImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm, bit IsAPFloat = 0; } +// Convenience wrapper for ImmLeaf to use timm/TargetConstant instead +// of imm/Constant. +class TImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm, + SDNode ImmNode = timm> : ImmLeaf<vt, pred, xform, ImmNode>; + // An ImmLeaf except that Imm is an APInt. This is useful when you need to // zero-extend the immediate instead of sign-extend it. // @@ -1111,6 +1159,16 @@ def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), let IsStore = 1; let MemoryVT = f32; } +def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (istore node:$val, node:$ptr, node:$offset), [{ @@ -1148,14 +1206,26 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), let IsStore = 1; let MemoryVT = f32; } +def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} -def nonvolatile_load : PatFrag<(ops node:$ptr), - (load node:$ptr), [{ - return !cast<LoadSDNode>(N)->isVolatile(); +// TODO: Split these into volatile and unordered flavors to enable +// selectively legal optimizations for each. (See D66309) +def simple_load : PatFrag<(ops node:$ptr), + (load node:$ptr), [{ + return cast<LoadSDNode>(N)->isSimple(); }]>; -def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return !cast<StoreSDNode>(N)->isVolatile(); +def simple_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->isSimple(); }]>; // nontemporal store fragments. @@ -1277,6 +1347,12 @@ def any_flog2 : PatFrags<(ops node:$src), def any_frint : PatFrags<(ops node:$src), [(strict_frint node:$src), (frint node:$src)]>; +def any_lrint : PatFrags<(ops node:$src), + [(strict_lrint node:$src), + (lrint node:$src)]>; +def any_llrint : PatFrags<(ops node:$src), + [(strict_llrint node:$src), + (llrint node:$src)]>; def any_fnearbyint : PatFrags<(ops node:$src), [(strict_fnearbyint node:$src), (fnearbyint node:$src)]>; @@ -1286,6 +1362,12 @@ def any_fceil : PatFrags<(ops node:$src), def any_ffloor : PatFrags<(ops node:$src), [(strict_ffloor node:$src), (ffloor node:$src)]>; +def any_lround : PatFrags<(ops node:$src), + [(strict_lround node:$src), + (lround node:$src)]>; +def any_llround : PatFrags<(ops node:$src), + [(strict_llround node:$src), + (llround node:$src)]>; def any_fround : PatFrags<(ops node:$src), [(strict_fround node:$src), (fround node:$src)]>; @@ -1310,6 +1392,12 @@ def any_extloadf32 : PatFrags<(ops node:$ptr), def any_extloadf64 : PatFrags<(ops node:$ptr), [(strict_extloadf64 node:$ptr), (extloadf64 node:$ptr)]>; +def any_fp_to_sint : PatFrags<(ops node:$src), + [(strict_fp_to_sint node:$src), + (fp_to_sint node:$src)]>; +def any_fp_to_uint : PatFrags<(ops node:$src), + [(strict_fp_to_uint node:$src), + (fp_to_uint node:$src)]>; multiclass binary_atomic_op_ord<SDNode atomic_op> { def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), @@ -1367,26 +1455,26 @@ multiclass ternary_atomic_op_ord<SDNode atomic_op> { } } -multiclass binary_atomic_op<SDNode atomic_op> { +multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { def _8 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i8; + let MemoryVT = !if(IsInt, i8, ?); } def _16 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i16; + let MemoryVT = !if(IsInt, i16, f16); } def _32 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i32; + let MemoryVT = !if(IsInt, i32, f32); } def _64 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i64; + let MemoryVT = !if(IsInt, i64, f64); } defm NAME#_8 : binary_atomic_op_ord<atomic_op>; |