diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 93 |
1 files changed, 59 insertions, 34 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 60e79c2c6c2f..a55729586b8d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -148,11 +148,15 @@ class AMDGPUCodeGenPrepare : public FunctionPass, /// \returns True. bool promoteUniformBitreverseToI32(IntrinsicInst &I) const; - + /// \returns The minimum number of bits needed to store the value of \Op as an + /// unsigned integer. Truncating to this size and then zero-extending to + /// ScalarSize will not change the value. unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const; + + /// \returns The minimum number of bits needed to store the value of \Op as a + /// signed integer. Truncating to this size and then sign-extending to + /// ScalarSize will not change the value. unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const; - bool isI24(Value *V, unsigned ScalarSize) const; - bool isU24(Value *V, unsigned ScalarSize) const; /// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24. /// SelectionDAG has an issue where an and asserting the bits are known @@ -451,17 +455,7 @@ unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op, unsigned ScalarSize) const { // In order for this to be a signed 24-bit value, bit 23, must // be a sign bit. - return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC); -} - -bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const { - return ScalarSize >= 24 && // Types less than 24-bit should be treated - // as unsigned 24-bit values. - numBitsSigned(V, ScalarSize) < 24; -} - -bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const { - return numBitsUnsigned(V, ScalarSize) <= 24; + return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC) + 1; } static void extractValues(IRBuilder<> &Builder, @@ -489,6 +483,34 @@ static Value *insertValues(IRBuilder<> &Builder, return NewVal; } +// Returns 24-bit or 48-bit (as per `NumBits` and `Size`) mul of `LHS` and +// `RHS`. `NumBits` is the number of KnownBits of the result and `Size` is the +// width of the original destination. +static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS, + unsigned Size, unsigned NumBits, bool IsSigned) { + if (Size <= 32 || NumBits <= 32) { + Intrinsic::ID ID = + IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24; + return Builder.CreateIntrinsic(ID, {}, {LHS, RHS}); + } + + assert(NumBits <= 48); + + Intrinsic::ID LoID = + IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24; + Intrinsic::ID HiID = + IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24; + + Value *Lo = Builder.CreateIntrinsic(LoID, {}, {LHS, RHS}); + Value *Hi = Builder.CreateIntrinsic(HiID, {}, {LHS, RHS}); + + IntegerType *I64Ty = Builder.getInt64Ty(); + Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty); + Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty); + + return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32)); +} + bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const { if (I.getOpcode() != Instruction::Mul) return false; @@ -507,13 +529,17 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const { IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); - Intrinsic::ID IntrID = Intrinsic::not_intrinsic; + unsigned LHSBits = 0, RHSBits = 0; + bool IsSigned = false; + + if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 && + (RHSBits = numBitsUnsigned(RHS, Size)) <= 24) { + IsSigned = false; + + } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS, Size)) <= 24 && + (RHSBits = numBitsSigned(RHS, Size)) <= 24) { + IsSigned = true; - // TODO: Should this try to match mulhi24? - if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) { - IntrID = Intrinsic::amdgcn_mul_u24; - } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) { - IntrID = Intrinsic::amdgcn_mul_i24; } else return false; @@ -523,27 +549,26 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const { extractValues(Builder, LHSVals, LHS); extractValues(Builder, RHSVals, RHS); - IntegerType *I32Ty = Builder.getInt32Ty(); - FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID); for (int I = 0, E = LHSVals.size(); I != E; ++I) { Value *LHS, *RHS; - if (IntrID == Intrinsic::amdgcn_mul_u24) { - LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty); - RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty); - } else { + if (IsSigned) { LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty); RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty); + } else { + LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty); + RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty); } - Value *Result = Builder.CreateCall(Intrin, {LHS, RHS}); + Value *Result = + getMul24(Builder, LHS, RHS, Size, LHSBits + RHSBits, IsSigned); - if (IntrID == Intrinsic::amdgcn_mul_u24) { - ResultVals.push_back(Builder.CreateZExtOrTrunc(Result, - LHSVals[I]->getType())); + if (IsSigned) { + ResultVals.push_back( + Builder.CreateSExtOrTrunc(Result, LHSVals[I]->getType())); } else { - ResultVals.push_back(Builder.CreateSExtOrTrunc(Result, - LHSVals[I]->getType())); + ResultVals.push_back( + Builder.CreateZExtOrTrunc(Result, LHSVals[I]->getType())); } } @@ -816,7 +841,7 @@ bool AMDGPUCodeGenPrepare::visitXor(BinaryOperator &I) { if (!RHS || !IntrinsicCall || RHS->getSExtValue() != -1) return visitBinaryOperator(I); - // Check if the Call is an intrinsic intruction to amdgcn_class intrinsic + // Check if the Call is an intrinsic instruction to amdgcn_class intrinsic // has only one use if (IntrinsicCall->getIntrinsicID() != Intrinsic::amdgcn_class || !IntrinsicCall->hasOneUse()) @@ -1314,7 +1339,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { ConstantInt *Lower = mdconst::extract<ConstantInt>(Range->getOperand(0)); - if (Lower->getValue().isNullValue()) { + if (Lower->isNullValue()) { WidenLoad->setMetadata(LLVMContext::MD_range, nullptr); } else { Metadata *LowAndHigh[] = { |
