diff options
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXInstrInfo.td')
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 53 |
1 files changed, 36 insertions, 17 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 22084cddc092..6f9c40feb10e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -145,6 +145,8 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; def True : Predicate<"true">; def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; +def hasPTX42 : Predicate<"Subtarget->getPTXVersion() >= 42">; +def hasPTX43 : Predicate<"Subtarget->getPTXVersion() >= 43">; def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">; def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">; @@ -152,12 +154,16 @@ def hasPTX64 : Predicate<"Subtarget->getPTXVersion() >= 64">; def hasPTX65 : Predicate<"Subtarget->getPTXVersion() >= 65">; def hasPTX70 : Predicate<"Subtarget->getPTXVersion() >= 70">; def hasPTX71 : Predicate<"Subtarget->getPTXVersion() >= 71">; +def hasPTX72 : Predicate<"Subtarget->getPTXVersion() >= 72">; def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">; +def hasSM32 : Predicate<"Subtarget->getSmVersion() >= 32">; +def hasSM53 : Predicate<"Subtarget->getSmVersion() >= 53">; def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">; def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">; def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">; def hasSM80 : Predicate<"Subtarget->getSmVersion() >= 80">; +def hasSM86 : Predicate<"Subtarget->getSmVersion() >= 86">; // non-sync shfl instructions are not available on sm_70+ in PTX6.4+ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" @@ -199,17 +205,29 @@ multiclass I3<string OpcStr, SDNode OpNode> { [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; } -// Template for instructions which take 3 int32 args. The instructions are +// Template for instructions which take 3 int args. The instructions are // named "<OpcStr>.s32" (e.g. "addc.cc.s32"). -multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> { - def i32rr : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; +multiclass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> { + let hasSideEffects = 1 in { + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>, + Requires<[hasPTX43]>; + def i64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>, + Requires<[hasPTX43]>; + } } // Template for instructions which take three fp64 or fp32 args. The @@ -579,14 +597,13 @@ defm SUB_i1 : ADD_SUB_i1<sub>; defm ADD : I3<"add.s", add>; defm SUB : I3<"sub.s", sub>; -// int32 addition and subtraction with carry-out. -// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?). -defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; -defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; +// in32 and int64 addition and subtraction with carry-out. +defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>; +defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>; -// int32 addition and subtraction with carry-in and carry-out. -defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; -defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; +// int32 and int64 addition and subtraction with carry-in and carry-out. +defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>; +defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>; defm MULT : I3<"mul.lo.s", mul>; @@ -2653,6 +2670,8 @@ def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", Int32Regs, Float16x2Regs>; def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", Float16x2Regs, Int32Regs>; +def BITCONVERT_32_F2F16x2 : F_BITCONVERT<"32", Float32Regs, Float16x2Regs>; +def BITCONVERT_32_F16x22F : F_BITCONVERT<"32", Float16x2Regs, Float32Regs>; // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where // we cannot specify floating-point literals in isel patterns. Therefore, we |