summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp93
1 files changed, 59 insertions, 34 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 60e79c2c6c2f..a55729586b8d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -148,11 +148,15 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
/// \returns True.
bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
-
+ /// \returns The minimum number of bits needed to store the value of \Op as an
+ /// unsigned integer. Truncating to this size and then zero-extending to
+ /// ScalarSize will not change the value.
unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
+
+ /// \returns The minimum number of bits needed to store the value of \Op as a
+ /// signed integer. Truncating to this size and then sign-extending to
+ /// ScalarSize will not change the value.
unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
- bool isI24(Value *V, unsigned ScalarSize) const;
- bool isU24(Value *V, unsigned ScalarSize) const;
/// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
/// SelectionDAG has an issue where an and asserting the bits are known
@@ -451,17 +455,7 @@ unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
unsigned ScalarSize) const {
// In order for this to be a signed 24-bit value, bit 23, must
// be a sign bit.
- return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
-}
-
-bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
- return ScalarSize >= 24 && // Types less than 24-bit should be treated
- // as unsigned 24-bit values.
- numBitsSigned(V, ScalarSize) < 24;
-}
-
-bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
- return numBitsUnsigned(V, ScalarSize) <= 24;
+ return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC) + 1;
}
static void extractValues(IRBuilder<> &Builder,
@@ -489,6 +483,34 @@ static Value *insertValues(IRBuilder<> &Builder,
return NewVal;
}
+// Returns 24-bit or 48-bit (as per `NumBits` and `Size`) mul of `LHS` and
+// `RHS`. `NumBits` is the number of KnownBits of the result and `Size` is the
+// width of the original destination.
+static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS,
+ unsigned Size, unsigned NumBits, bool IsSigned) {
+ if (Size <= 32 || NumBits <= 32) {
+ Intrinsic::ID ID =
+ IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
+ return Builder.CreateIntrinsic(ID, {}, {LHS, RHS});
+ }
+
+ assert(NumBits <= 48);
+
+ Intrinsic::ID LoID =
+ IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
+ Intrinsic::ID HiID =
+ IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24;
+
+ Value *Lo = Builder.CreateIntrinsic(LoID, {}, {LHS, RHS});
+ Value *Hi = Builder.CreateIntrinsic(HiID, {}, {LHS, RHS});
+
+ IntegerType *I64Ty = Builder.getInt64Ty();
+ Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty);
+ Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty);
+
+ return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
+}
+
bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
if (I.getOpcode() != Instruction::Mul)
return false;
@@ -507,13 +529,17 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
- Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+ unsigned LHSBits = 0, RHSBits = 0;
+ bool IsSigned = false;
+
+ if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 &&
+ (RHSBits = numBitsUnsigned(RHS, Size)) <= 24) {
+ IsSigned = false;
+
+ } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS, Size)) <= 24 &&
+ (RHSBits = numBitsSigned(RHS, Size)) <= 24) {
+ IsSigned = true;
- // TODO: Should this try to match mulhi24?
- if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
- IntrID = Intrinsic::amdgcn_mul_u24;
- } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
- IntrID = Intrinsic::amdgcn_mul_i24;
} else
return false;
@@ -523,27 +549,26 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
extractValues(Builder, LHSVals, LHS);
extractValues(Builder, RHSVals, RHS);
-
IntegerType *I32Ty = Builder.getInt32Ty();
- FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
for (int I = 0, E = LHSVals.size(); I != E; ++I) {
Value *LHS, *RHS;
- if (IntrID == Intrinsic::amdgcn_mul_u24) {
- LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
- RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
- } else {
+ if (IsSigned) {
LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
+ } else {
+ LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+ RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
}
- Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});
+ Value *Result =
+ getMul24(Builder, LHS, RHS, Size, LHSBits + RHSBits, IsSigned);
- if (IntrID == Intrinsic::amdgcn_mul_u24) {
- ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
- LHSVals[I]->getType()));
+ if (IsSigned) {
+ ResultVals.push_back(
+ Builder.CreateSExtOrTrunc(Result, LHSVals[I]->getType()));
} else {
- ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
- LHSVals[I]->getType()));
+ ResultVals.push_back(
+ Builder.CreateZExtOrTrunc(Result, LHSVals[I]->getType()));
}
}
@@ -816,7 +841,7 @@ bool AMDGPUCodeGenPrepare::visitXor(BinaryOperator &I) {
if (!RHS || !IntrinsicCall || RHS->getSExtValue() != -1)
return visitBinaryOperator(I);
- // Check if the Call is an intrinsic intruction to amdgcn_class intrinsic
+ // Check if the Call is an intrinsic instruction to amdgcn_class intrinsic
// has only one use
if (IntrinsicCall->getIntrinsicID() != Intrinsic::amdgcn_class ||
!IntrinsicCall->hasOneUse())
@@ -1314,7 +1339,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
ConstantInt *Lower =
mdconst::extract<ConstantInt>(Range->getOperand(0));
- if (Lower->getValue().isNullValue()) {
+ if (Lower->isNullValue()) {
WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
} else {
Metadata *LowAndHigh[] = {