diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructions.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 252 |
1 files changed, 73 insertions, 179 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 846e7f577a28..7e71dbdd1240 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -77,28 +77,39 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> def TruePredicate : Predicate<"">; +// Add a predicate to the list if does not already exist to deduplicate it. +class PredConcat<list<Predicate> lst, Predicate pred> { + list<Predicate> ret = + !foldl([pred], lst, acc, cur, + !listconcat(acc, !if(!eq(!cast<string>(cur),!cast<string>(pred)), + [], [cur]))); +} + class PredicateControl { Predicate SubtargetPredicate = TruePredicate; - list<Predicate> AssemblerPredicates = []; Predicate AssemblerPredicate = TruePredicate; Predicate WaveSizePredicate = TruePredicate; list<Predicate> OtherPredicates = []; - list<Predicate> Predicates = !listconcat([SubtargetPredicate, - AssemblerPredicate, - WaveSizePredicate], - AssemblerPredicates, - OtherPredicates); + list<Predicate> Predicates = PredConcat< + PredConcat<PredConcat<OtherPredicates, + SubtargetPredicate>.ret, + AssemblerPredicate>.ret, + WaveSizePredicate>.ret; } + class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl; -def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">; -def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">; -def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">; -def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">; -def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">; -def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">; +let RecomputePerFunction = 1 in { +def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">; +def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">; +def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">; +def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">; +def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">; +def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">; def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; +} + def FMA : Predicate<"Subtarget->hasFMA()">; def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; @@ -147,20 +158,30 @@ def brtarget : Operand<OtherVT>; class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag< (ops node:$src0), (op $src0), - [{ return N->hasOneUse(); }] ->; + [{ return N->hasOneUse(); }]> { + + let GISelPredicateCode = [{ + return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); + }]; +} class HasOneUseBinOp<SDPatternOperator op> : PatFrag< (ops node:$src0, node:$src1), (op $src0, $src1), - [{ return N->hasOneUse(); }] ->; + [{ return N->hasOneUse(); }]> { + let GISelPredicateCode = [{ + return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); + }]; +} class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< (ops node:$src0, node:$src1, node:$src2), (op $src0, $src1, $src2), - [{ return N->hasOneUse(); }] ->; + [{ return N->hasOneUse(); }]> { + let GISelPredicateCode = [{ + return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); + }]; +} let Properties = [SDNPCommutative, SDNPAssociative] in { def smax_oneuse : HasOneUseBinOp<smax>; @@ -315,15 +336,10 @@ class Aligned<int Bytes> { int MinAlignment = Bytes; } -class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>; - -class StoreFrag<SDPatternOperator op> : PatFrag < - (ops node:$value, node:$ptr), (op node:$value, node:$ptr) ->; - class StoreHi16<SDPatternOperator op> : PatFrag < - (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr) ->; + (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> { + let IsStore = 1; +} def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant ]>; def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, AddrSpaces.Constant ]>; @@ -345,48 +361,6 @@ def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; -class GlobalLoadAddress : CodePatPred<[{ - auto AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS; -}]>; - -class FlatLoadAddress : CodePatPred<[{ - const auto AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; -}]>; - -class GlobalAddress : CodePatPred<[{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; -}]>; - -class PrivateAddress : CodePatPred<[{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; -}]>; - -class LocalAddress : CodePatPred<[{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; - -class RegionAddress : CodePatPred<[{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; -}]>; - -class FlatStoreAddress : CodePatPred<[{ - const auto AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS; -}]>; - -// TODO: Remove these when stores to new PatFrag format. -class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress; -class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress; -class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress; -class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress; -class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress; - - foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { @@ -464,6 +438,10 @@ def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), let MemoryVT = i16; } +def store_hi16_#as : StoreHi16 <truncstorei16>; +def truncstorei8_hi16_#as : StoreHi16<truncstorei8>; +def truncstorei16_hi16_#as : StoreHi16<truncstorei16>; + defm atomic_store_#as : binary_atomic_op<atomic_store>; } // End let AddressSpaces = ... @@ -497,18 +475,7 @@ defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>; defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>; defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>; defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>; - - -def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress; -def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress; - -def store_atomic_global : GlobalStore<atomic_store>; -def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress; -def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress; - -def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress; -def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress; -def atomic_store_local : LocalStore <atomic_store>; +defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>; def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { @@ -535,30 +502,6 @@ def store_align16_local: PatFrag<(ops node:$val, node:$ptr), let IsTruncStore = 0; } - -def atomic_store_flat : FlatStore <atomic_store>; -def truncstorei8_hi16_flat : StoreHi16<truncstorei8>, FlatStoreAddress; -def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress; - - -class local_binary_atomic_op<SDNode atomic_op> : - PatFrag<(ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; - -class region_binary_atomic_op<SDNode atomic_op> : - PatFrag<(ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; -}]>; - - -def mskor_global : PatFrag<(ops node:$val, node:$ptr), - (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; -}]>; - let AddressSpaces = StoreAddress_local.AddrSpaces in { defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>; defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; @@ -569,31 +512,6 @@ defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>; defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; } -class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; - -// Legacy. -def AMDGPUatomic_cmp_swap_global : PatFrag< - (ops node:$ptr, node:$value), - (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress; - -def atomic_cmp_swap_global : PatFrag< - (ops node:$ptr, node:$cmp, node:$value), - (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress; - - -def atomic_cmp_swap_global_noret : PatFrag< - (ops node:$ptr, node:$cmp, node:$value), - (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; - -def atomic_cmp_swap_global_ret : PatFrag< - (ops node:$ptr, node:$cmp, node:$value), - (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; - //===----------------------------------------------------------------------===// // Misc Pattern Fragments //===----------------------------------------------------------------------===// @@ -686,12 +604,12 @@ multiclass BFIPatterns <Instruction BFI_INT, def : AMDGPUPat < (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))), (REG_SEQUENCE RC64, - (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)), - (i32 (EXTRACT_SUBREG $y, sub0)), - (i32 (EXTRACT_SUBREG $z, sub0))), sub0, - (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)), - (i32 (EXTRACT_SUBREG $y, sub1)), - (i32 (EXTRACT_SUBREG $z, sub1))), sub1) + (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub0)), + (i32 (EXTRACT_SUBREG RC64:$y, sub0)), + (i32 (EXTRACT_SUBREG RC64:$z, sub0))), sub0, + (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub1)), + (i32 (EXTRACT_SUBREG RC64:$y, sub1)), + (i32 (EXTRACT_SUBREG RC64:$z, sub1))), sub1) >; // SHA-256 Ch function @@ -705,12 +623,12 @@ multiclass BFIPatterns <Instruction BFI_INT, def : AMDGPUPat < (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))), (REG_SEQUENCE RC64, - (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)), - (i32 (EXTRACT_SUBREG $y, sub0)), - (i32 (EXTRACT_SUBREG $z, sub0))), sub0, - (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)), - (i32 (EXTRACT_SUBREG $y, sub1)), - (i32 (EXTRACT_SUBREG $z, sub1))), sub1) + (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub0)), + (i32 (EXTRACT_SUBREG RC64:$y, sub0)), + (i32 (EXTRACT_SUBREG RC64:$z, sub0))), sub0, + (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub1)), + (i32 (EXTRACT_SUBREG RC64:$y, sub1)), + (i32 (EXTRACT_SUBREG RC64:$z, sub1))), sub1) >; def : AMDGPUPat < @@ -721,7 +639,7 @@ multiclass BFIPatterns <Instruction BFI_INT, def : AMDGPUPat < (f32 (fcopysign f32:$src0, f64:$src1)), (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, - (i32 (EXTRACT_SUBREG $src1, sub1))) + (i32 (EXTRACT_SUBREG RC64:$src1, sub1))) >; def : AMDGPUPat < @@ -729,8 +647,8 @@ multiclass BFIPatterns <Instruction BFI_INT, (REG_SEQUENCE RC64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, (BFI_INT (LoadImm32 (i32 0x7fffffff)), - (i32 (EXTRACT_SUBREG $src0, sub1)), - (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) + (i32 (EXTRACT_SUBREG RC64:$src0, sub1)), + (i32 (EXTRACT_SUBREG RC64:$src1, sub1))), sub1) >; def : AMDGPUPat < @@ -738,7 +656,7 @@ multiclass BFIPatterns <Instruction BFI_INT, (REG_SEQUENCE RC64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, (BFI_INT (LoadImm32 (i32 0x7fffffff)), - (i32 (EXTRACT_SUBREG $src0, sub1)), + (i32 (EXTRACT_SUBREG RC64:$src0, sub1)), $src1), sub1) >; } @@ -755,21 +673,21 @@ multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass def : AMDGPUPat < (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))), (REG_SEQUENCE RC64, - (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)), - (i32 (EXTRACT_SUBREG $y, sub0))), - (i32 (EXTRACT_SUBREG $z, sub0)), - (i32 (EXTRACT_SUBREG $y, sub0))), sub0, - (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)), - (i32 (EXTRACT_SUBREG $y, sub1))), - (i32 (EXTRACT_SUBREG $z, sub1)), - (i32 (EXTRACT_SUBREG $y, sub1))), sub1) + (BFI_INT (XOR (i32 (EXTRACT_SUBREG RC64:$x, sub0)), + (i32 (EXTRACT_SUBREG RC64:$y, sub0))), + (i32 (EXTRACT_SUBREG RC64:$z, sub0)), + (i32 (EXTRACT_SUBREG RC64:$y, sub0))), sub0, + (BFI_INT (XOR (i32 (EXTRACT_SUBREG RC64:$x, sub1)), + (i32 (EXTRACT_SUBREG RC64:$y, sub1))), + (i32 (EXTRACT_SUBREG RC64:$z, sub1)), + (i32 (EXTRACT_SUBREG RC64:$y, sub1))), sub1) >; } // Bitfield extract patterns -def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{ - return isMask_32(N->getZExtValue()); +def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{ + return isMask_32(Imm); }]>; def IMMPopCount : SDNodeXForm<imm, [{ @@ -819,30 +737,6 @@ class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat < (BIT_ALIGN $src0, $src0, $src1) >; -multiclass IntMed3Pat<Instruction med3Inst, - SDPatternOperator min, - SDPatternOperator max, - SDPatternOperator min_oneuse, - SDPatternOperator max_oneuse, - ValueType vt = i32> { - - // This matches 16 permutations of - // min(max(a, b), max(min(a, b), c)) - def : AMDGPUPat < - (min (max_oneuse vt:$src0, vt:$src1), - (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst vt:$src0, vt:$src1, vt:$src2) ->; - - // This matches 16 permutations of - // max(min(x, y), min(max(x, y), z)) - def : AMDGPUPat < - (max (min_oneuse vt:$src0, vt:$src1), - (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst $src0, $src1, $src2) ->; -} - // Special conversion patterns def cvt_rpi_i32_f32 : PatFrag < |