aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp37
1 files changed, 30 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9d7443012e3d..541a5b62450d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -169,11 +169,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
@@ -185,10 +191,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16bf16, Expand);
setOperationAction(ISD::STORE, MVT::f32, Promote);
AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
@@ -506,9 +517,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);
- // There are no libcalls of any kind.
- for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
- setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
+ // Disable most libcalls.
+ for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
+ if (I < RTLIB::ATOMIC_LOAD || I > RTLIB::ATOMIC_FETCH_NAND_16)
+ setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
+ }
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
@@ -556,6 +569,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
ISD::FSUB, ISD::FNEG,
ISD::FABS, ISD::AssertZext,
ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});
+
+ setMaxAtomicSizeInBitsSupported(64);
}
bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
@@ -3055,18 +3070,26 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ||
Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
+ bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;
- if (Src.getValueType() == MVT::i32) {
+ if (Src.getValueType() == MVT::i32 || Is64BitScalar) {
// (ctlz hi:lo) -> (umin (ffbh src), 32)
// (cttz hi:lo) -> (umin (ffbl src), 32)
// (ctlz_zero_undef src) -> (ffbh src)
// (cttz_zero_undef src) -> (ffbl src)
+
+ // 64-bit scalar version produce 32-bit result
+ // (ctlz hi:lo) -> (umin (S_FLBIT_I32_B64 src), 64)
+ // (cttz hi:lo) -> (umin (S_FF1_I32_B64 src), 64)
+ // (ctlz_zero_undef src) -> (S_FLBIT_I32_B64 src)
+ // (cttz_zero_undef src) -> (S_FF1_I32_B64 src)
SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
if (!ZeroUndef) {
- const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
- NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32);
+ const SDValue ConstVal = DAG.getConstant(
+ Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);
+ NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal);
}
- return NewOpr;
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr);
}
SDValue Lo, Hi;