diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 34 | 
1 files changed, 34 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index 7b18e1f805d8..21bfab52c6c4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -104,6 +104,14 @@ public:    void applyCombineSignExtendInReg(MachineInstr &MI,                                     MachineInstr *&MatchInfo) const; +  // Find the s_mul_u64 instructions where the higher bits are either +  // zero-extended or sign-extended. +  bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const; +  // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher +  // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32 +  // bits are zero extended. +  void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const; +  private:  #define GET_GICOMBINER_CLASS_MEMBERS  #define AMDGPUSubtarget GCNSubtarget @@ -419,6 +427,32 @@ void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(    MI.eraseFromParent();  } +bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64( +    MachineInstr &MI, unsigned &NewOpcode) const { +  Register Src0 = MI.getOperand(1).getReg(); +  Register Src1 = MI.getOperand(2).getReg(); +  if (MRI.getType(Src0) != LLT::scalar(64)) +    return false; + +  if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 && +      KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) { +    NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32; +    return true; +  } + +  if (KB->computeNumSignBits(Src1) >= 33 && +      KB->computeNumSignBits(Src0) >= 33) { +    NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32; +    return true; +  } +  return false; +} + +void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64( +    MachineInstr &MI, unsigned &NewOpcode) const { +  Helper.replaceOpcodeWith(MI, NewOpcode); +} +  // Pass boilerplate  // ================  | 
