aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp39
1 files changed, 16 insertions, 23 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index a79549301740..2556996df97f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -13,40 +13,19 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "llvm/ADT/FloatingPointMode.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/IntegerDivision.h"
-#include <cassert>
-#include <iterator>
#define DEBUG_TYPE "amdgpu-codegenprepare"
@@ -60,6 +39,12 @@ static cl::opt<bool> WidenLoads(
cl::ReallyHidden,
cl::init(false));
+static cl::opt<bool> Widen16BitOps(
+ "amdgpu-codegenprepare-widen-16-bit-ops",
+ cl::desc("Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
+ cl::ReallyHidden,
+ cl::init(true));
+
static cl::opt<bool> UseMul24Intrin(
"amdgpu-codegenprepare-mul24",
cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
@@ -269,6 +254,9 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
}
bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
+ if (!Widen16BitOps)
+ return false;
+
const IntegerType *IntTy = dyn_cast<IntegerType>(T);
if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
return true;
@@ -751,6 +739,11 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Type *Ty = FDiv.getType()->getScalarType();
+ // The f64 rcp/rsq approximations are pretty inaccurate. We can do an
+ // expansion around them in codegen.
+ if (Ty->isDoubleTy())
+ return false;
+
// No intrinsic for fdiv16 if target does not support f16.
if (Ty->isHalfTy() && !ST->has16BitInsts())
return false;