summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructions.td')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td252
1 files changed, 73 insertions, 179 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 846e7f577a28..7e71dbdd1240 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -77,28 +77,39 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
def TruePredicate : Predicate<"">;
+// Add a predicate to the list if does not already exist to deduplicate it.
+class PredConcat<list<Predicate> lst, Predicate pred> {
+ list<Predicate> ret =
+ !foldl([pred], lst, acc, cur,
+ !listconcat(acc, !if(!eq(!cast<string>(cur),!cast<string>(pred)),
+ [], [cur])));
+}
+
class PredicateControl {
Predicate SubtargetPredicate = TruePredicate;
- list<Predicate> AssemblerPredicates = [];
Predicate AssemblerPredicate = TruePredicate;
Predicate WaveSizePredicate = TruePredicate;
list<Predicate> OtherPredicates = [];
- list<Predicate> Predicates = !listconcat([SubtargetPredicate,
- AssemblerPredicate,
- WaveSizePredicate],
- AssemblerPredicates,
- OtherPredicates);
+ list<Predicate> Predicates = PredConcat<
+ PredConcat<PredConcat<OtherPredicates,
+ SubtargetPredicate>.ret,
+ AssemblerPredicate>.ret,
+ WaveSizePredicate>.ret;
}
+
class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
PredicateControl;
-def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
-def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
-def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
-def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
-def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
-def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
+let RecomputePerFunction = 1 in {
+def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
+def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">;
+def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
+def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
+def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">;
+def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
+}
+
def FMA : Predicate<"Subtarget->hasFMA()">;
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
@@ -147,20 +158,30 @@ def brtarget : Operand<OtherVT>;
class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
(ops node:$src0),
(op $src0),
- [{ return N->hasOneUse(); }]
->;
+ [{ return N->hasOneUse(); }]> {
+
+ let GISelPredicateCode = [{
+ return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
+ }];
+}
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
(ops node:$src0, node:$src1),
(op $src0, $src1),
- [{ return N->hasOneUse(); }]
->;
+ [{ return N->hasOneUse(); }]> {
+ let GISelPredicateCode = [{
+ return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
+ }];
+}
class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
(ops node:$src0, node:$src1, node:$src2),
(op $src0, $src1, $src2),
- [{ return N->hasOneUse(); }]
->;
+ [{ return N->hasOneUse(); }]> {
+ let GISelPredicateCode = [{
+ return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
+ }];
+}
let Properties = [SDNPCommutative, SDNPAssociative] in {
def smax_oneuse : HasOneUseBinOp<smax>;
@@ -315,15 +336,10 @@ class Aligned<int Bytes> {
int MinAlignment = Bytes;
}
-class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
-
-class StoreFrag<SDPatternOperator op> : PatFrag <
- (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
->;
-
class StoreHi16<SDPatternOperator op> : PatFrag <
- (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
->;
+ (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
+ let IsStore = 1;
+}
def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant ]>;
def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, AddrSpaces.Constant ]>;
@@ -345,48 +361,6 @@ def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
-class GlobalLoadAddress : CodePatPred<[{
- auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
-
-class FlatLoadAddress : CodePatPred<[{
- const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
-
-class GlobalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
-class PrivateAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
-}]>;
-
-class LocalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-
-class RegionAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
-}]>;
-
-class FlatStoreAddress : CodePatPred<[{
- const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
-// TODO: Remove these when stores to new PatFrag format.
-class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
-class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
-class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress;
-class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
-class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
-
-
foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
@@ -464,6 +438,10 @@ def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
let MemoryVT = i16;
}
+def store_hi16_#as : StoreHi16 <truncstorei16>;
+def truncstorei8_hi16_#as : StoreHi16<truncstorei8>;
+def truncstorei16_hi16_#as : StoreHi16<truncstorei16>;
+
defm atomic_store_#as : binary_atomic_op<atomic_store>;
} // End let AddressSpaces = ...
@@ -497,18 +475,7 @@ defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
-
-
-def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
-def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
-
-def store_atomic_global : GlobalStore<atomic_store>;
-def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
-def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
-
-def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
-def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
-def atomic_store_local : LocalStore <atomic_store>;
+defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
@@ -535,30 +502,6 @@ def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
let IsTruncStore = 0;
}
-
-def atomic_store_flat : FlatStore <atomic_store>;
-def truncstorei8_hi16_flat : StoreHi16<truncstorei8>, FlatStoreAddress;
-def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
-
-
-class local_binary_atomic_op<SDNode atomic_op> :
- PatFrag<(ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-
-class region_binary_atomic_op<SDNode atomic_op> :
- PatFrag<(ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
-}]>;
-
-
-def mskor_global : PatFrag<(ops node:$val, node:$ptr),
- (AMDGPUstore_mskor node:$val, node:$ptr), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
let AddressSpaces = StoreAddress_local.AddrSpaces in {
defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
@@ -569,31 +512,6 @@ defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
}
-class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
-
-// Legacy.
-def AMDGPUatomic_cmp_swap_global : PatFrag<
- (ops node:$ptr, node:$value),
- (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
-
-def atomic_cmp_swap_global : PatFrag<
- (ops node:$ptr, node:$cmp, node:$value),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
-
-
-def atomic_cmp_swap_global_noret : PatFrag<
- (ops node:$ptr, node:$cmp, node:$value),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
-
-def atomic_cmp_swap_global_ret : PatFrag<
- (ops node:$ptr, node:$cmp, node:$value),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
-
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
//===----------------------------------------------------------------------===//
@@ -686,12 +604,12 @@ multiclass BFIPatterns <Instruction BFI_INT,
def : AMDGPUPat <
(or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
(REG_SEQUENCE RC64,
- (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
- (i32 (EXTRACT_SUBREG $y, sub0)),
- (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
- (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
- (i32 (EXTRACT_SUBREG $y, sub1)),
- (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
+ (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub0))), sub0,
+ (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub1))), sub1)
>;
// SHA-256 Ch function
@@ -705,12 +623,12 @@ multiclass BFIPatterns <Instruction BFI_INT,
def : AMDGPUPat <
(xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
(REG_SEQUENCE RC64,
- (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
- (i32 (EXTRACT_SUBREG $y, sub0)),
- (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
- (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
- (i32 (EXTRACT_SUBREG $y, sub1)),
- (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
+ (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub0))), sub0,
+ (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub1))), sub1)
>;
def : AMDGPUPat <
@@ -721,7 +639,7 @@ multiclass BFIPatterns <Instruction BFI_INT,
def : AMDGPUPat <
(f32 (fcopysign f32:$src0, f64:$src1)),
(BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
- (i32 (EXTRACT_SUBREG $src1, sub1)))
+ (i32 (EXTRACT_SUBREG RC64:$src1, sub1)))
>;
def : AMDGPUPat <
@@ -729,8 +647,8 @@ multiclass BFIPatterns <Instruction BFI_INT,
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 (i32 0x7fffffff)),
- (i32 (EXTRACT_SUBREG $src0, sub1)),
- (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
+ (i32 (EXTRACT_SUBREG RC64:$src0, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$src1, sub1))), sub1)
>;
def : AMDGPUPat <
@@ -738,7 +656,7 @@ multiclass BFIPatterns <Instruction BFI_INT,
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 (i32 0x7fffffff)),
- (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$src0, sub1)),
$src1), sub1)
>;
}
@@ -755,21 +673,21 @@ multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass
def : AMDGPUPat <
(or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
(REG_SEQUENCE RC64,
- (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
- (i32 (EXTRACT_SUBREG $y, sub0))),
- (i32 (EXTRACT_SUBREG $z, sub0)),
- (i32 (EXTRACT_SUBREG $y, sub0))), sub0,
- (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
- (i32 (EXTRACT_SUBREG $y, sub1))),
- (i32 (EXTRACT_SUBREG $z, sub1)),
- (i32 (EXTRACT_SUBREG $y, sub1))), sub1)
+ (BFI_INT (XOR (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub0))),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub0))), sub0,
+ (BFI_INT (XOR (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub1))),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub1))), sub1)
>;
}
// Bitfield extract patterns
-def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
- return isMask_32(N->getZExtValue());
+def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
+ return isMask_32(Imm);
}]>;
def IMMPopCount : SDNodeXForm<imm, [{
@@ -819,30 +737,6 @@ class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(BIT_ALIGN $src0, $src0, $src1)
>;
-multiclass IntMed3Pat<Instruction med3Inst,
- SDPatternOperator min,
- SDPatternOperator max,
- SDPatternOperator min_oneuse,
- SDPatternOperator max_oneuse,
- ValueType vt = i32> {
-
- // This matches 16 permutations of
- // min(max(a, b), max(min(a, b), c))
- def : AMDGPUPat <
- (min (max_oneuse vt:$src0, vt:$src1),
- (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
- (med3Inst vt:$src0, vt:$src1, vt:$src2)
->;
-
- // This matches 16 permutations of
- // max(min(x, y), min(max(x, y), z))
- def : AMDGPUPat <
- (max (min_oneuse vt:$src0, vt:$src1),
- (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
- (med3Inst $src0, $src1, $src2)
->;
-}
-
// Special conversion patterns
def cvt_rpi_i32_f32 : PatFrag <