diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 21:25:48 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 21:25:48 +0000 | 
| commit | d88c1a5a572cdb661c111098831fa526e933756f (patch) | |
| tree | 97b32c3372106ac47ded3d1a99f9c023a8530073 /contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | |
| parent | 715652a404ee99f10c09c0a5edbb5883961b8c25 (diff) | |
| parent | b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff) | |
Notes
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
| -rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 301 | 
1 files changed, 294 insertions, 7 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index b955e231699a..e6230547a9b3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -39,6 +39,78 @@ class AMDGPUCodeGenPrepare : public FunctionPass,    Module *Mod;    bool HasUnsafeFPMath; +  /// \brief Copies exact/nsw/nuw flags (if any) from binary operation \p I to +  /// binary operation \p V. +  /// +  /// \returns Binary operation \p V. +  Value *copyFlags(const BinaryOperator &I, Value *V) const; + +  /// \returns \p T's base element bit width. +  unsigned getBaseElementBitWidth(const Type *T) const; + +  /// \returns Equivalent 32 bit integer type for given type \p T. For example, +  /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32> +  /// is returned. +  Type *getI32Ty(IRBuilder<> &B, const Type *T) const; + +  /// \returns True if binary operation \p I is a signed binary operation, false +  /// otherwise. +  bool isSigned(const BinaryOperator &I) const; + +  /// \returns True if the condition of 'select' operation \p I comes from a +  /// signed 'icmp' operation, false otherwise. +  bool isSigned(const SelectInst &I) const; + +  /// \returns True if type \p T needs to be promoted to 32 bit integer type, +  /// false otherwise. +  bool needsPromotionToI32(const Type *T) const; + +  /// \brief Promotes uniform binary operation \p I to equivalent 32 bit binary +  /// operation. +  /// +  /// \details \p I's base element bit width must be greater than 1 and less +  /// than or equal 16. Promotion is done by sign or zero extending operands to +  /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and +  /// truncating the result of 32 bit binary operation back to \p I's original +  /// type. Division operation is not promoted. +  /// +  /// \returns True if \p I is promoted to equivalent 32 bit binary operation, +  /// false otherwise. +  bool promoteUniformOpToI32(BinaryOperator &I) const; + +  /// \brief Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation. +  /// +  /// \details \p I's base element bit width must be greater than 1 and less +  /// than or equal 16. Promotion is done by sign or zero extending operands to +  /// 32 bits, and replacing \p I with 32 bit 'icmp' operation. +  /// +  /// \returns True. +  bool promoteUniformOpToI32(ICmpInst &I) const; + +  /// \brief Promotes uniform 'select' operation \p I to 32 bit 'select' +  /// operation. +  /// +  /// \details \p I's base element bit width must be greater than 1 and less +  /// than or equal 16. Promotion is done by sign or zero extending operands to +  /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the +  /// result of 32 bit 'select' operation back to \p I's original type. +  /// +  /// \returns True. +  bool promoteUniformOpToI32(SelectInst &I) const; + +  /// \brief Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse' +  /// intrinsic. +  /// +  /// \details \p I's base element bit width must be greater than 1 and less +  /// than or equal 16. Promotion is done by zero extending the operand to 32 +  /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the +  /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the +  /// shift amount is 32 minus \p I's base element bit width), and truncating +  /// the result of the shift operation back to \p I's original type. +  /// +  /// \returns True. +  bool promoteUniformBitreverseToI32(IntrinsicInst &I) const; +  public:    static char ID;    AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) : @@ -51,16 +123,18 @@ public:    bool visitFDiv(BinaryOperator &I); -  bool visitInstruction(Instruction &I) { -    return false; -  } +  bool visitInstruction(Instruction &I) { return false; } +  bool visitBinaryOperator(BinaryOperator &I); +  bool visitICmpInst(ICmpInst &I); +  bool visitSelectInst(SelectInst &I); + +  bool visitIntrinsicInst(IntrinsicInst &I); +  bool visitBitreverseIntrinsicInst(IntrinsicInst &I);    bool doInitialization(Module &M) override;    bool runOnFunction(Function &F) override; -  const char *getPassName() const override { -    return "AMDGPU IR optimizations"; -  } +  StringRef getPassName() const override { return "AMDGPU IR optimizations"; }    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<DivergenceAnalysis>(); @@ -70,6 +144,171 @@ public:  } // End anonymous namespace +Value *AMDGPUCodeGenPrepare::copyFlags( +    const BinaryOperator &I, Value *V) const { +  BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V); +  if (!BinOp) // Possibly constant expression. +    return V; + +  if (isa<OverflowingBinaryOperator>(BinOp)) { +    BinOp->setHasNoSignedWrap(I.hasNoSignedWrap()); +    BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); +  } else if (isa<PossiblyExactOperator>(BinOp)) +    BinOp->setIsExact(I.isExact()); + +  return V; +} + +unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const { +  assert(needsPromotionToI32(T) && "T does not need promotion to i32"); + +  if (T->isIntegerTy()) +    return T->getIntegerBitWidth(); +  return cast<VectorType>(T)->getElementType()->getIntegerBitWidth(); +} + +Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const { +  assert(needsPromotionToI32(T) && "T does not need promotion to i32"); + +  if (T->isIntegerTy()) +    return B.getInt32Ty(); +  return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements()); +} + +bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const { +  return I.getOpcode() == Instruction::AShr || +      I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem; +} + +bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const { +  return isa<ICmpInst>(I.getOperand(0)) ? +      cast<ICmpInst>(I.getOperand(0))->isSigned() : false; +} + +bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const { +  if (T->isIntegerTy() && T->getIntegerBitWidth() > 1 && +      T->getIntegerBitWidth() <= 16) +    return true; +  if (!T->isVectorTy()) +    return false; +  return needsPromotionToI32(cast<VectorType>(T)->getElementType()); +} + +bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const { +  assert(needsPromotionToI32(I.getType()) && +         "I does not need promotion to i32"); + +  if (I.getOpcode() == Instruction::SDiv || +      I.getOpcode() == Instruction::UDiv) +    return false; + +  IRBuilder<> Builder(&I); +  Builder.SetCurrentDebugLocation(I.getDebugLoc()); + +  Type *I32Ty = getI32Ty(Builder, I.getType()); +  Value *ExtOp0 = nullptr; +  Value *ExtOp1 = nullptr; +  Value *ExtRes = nullptr; +  Value *TruncRes = nullptr; + +  if (isSigned(I)) { +    ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); +    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); +  } else { +    ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); +    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); +  } +  ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1)); +  TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); + +  I.replaceAllUsesWith(TruncRes); +  I.eraseFromParent(); + +  return true; +} + +bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const { +  assert(needsPromotionToI32(I.getOperand(0)->getType()) && +         "I does not need promotion to i32"); + +  IRBuilder<> Builder(&I); +  Builder.SetCurrentDebugLocation(I.getDebugLoc()); + +  Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType()); +  Value *ExtOp0 = nullptr; +  Value *ExtOp1 = nullptr; +  Value *NewICmp  = nullptr; + +  if (I.isSigned()) { +    ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); +    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); +  } else { +    ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); +    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); +  } +  NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1); + +  I.replaceAllUsesWith(NewICmp); +  I.eraseFromParent(); + +  return true; +} + +bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const { +  assert(needsPromotionToI32(I.getType()) && +         "I does not need promotion to i32"); + +  IRBuilder<> Builder(&I); +  Builder.SetCurrentDebugLocation(I.getDebugLoc()); + +  Type *I32Ty = getI32Ty(Builder, I.getType()); +  Value *ExtOp1 = nullptr; +  Value *ExtOp2 = nullptr; +  Value *ExtRes = nullptr; +  Value *TruncRes = nullptr; + +  if (isSigned(I)) { +    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); +    ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty); +  } else { +    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); +    ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty); +  } +  ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2); +  TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); + +  I.replaceAllUsesWith(TruncRes); +  I.eraseFromParent(); + +  return true; +} + +bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32( +    IntrinsicInst &I) const { +  assert(I.getIntrinsicID() == Intrinsic::bitreverse && +         "I must be bitreverse intrinsic"); +  assert(needsPromotionToI32(I.getType()) && +         "I does not need promotion to i32"); + +  IRBuilder<> Builder(&I); +  Builder.SetCurrentDebugLocation(I.getDebugLoc()); + +  Type *I32Ty = getI32Ty(Builder, I.getType()); +  Function *I32 = +      Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty }); +  Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); +  Value *ExtRes = Builder.CreateCall(I32, { ExtOp }); +  Value *LShrOp = +      Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType())); +  Value *TruncRes = +      Builder.CreateTrunc(LShrOp, I.getType()); + +  I.replaceAllUsesWith(TruncRes); +  I.eraseFromParent(); + +  return true; +} +  static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {    const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);    if (!CNum) @@ -85,7 +324,6 @@ static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {  bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {    Type *Ty = FDiv.getType(); -  // TODO: Handle half    if (!Ty->getScalarType()->isFloatTy())      return false; @@ -154,6 +392,55 @@ static bool hasUnsafeFPMath(const Function &F) {    return Attr.getValueAsString() == "true";  } +bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) { +  bool Changed = false; + +  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && +      DA->isUniform(&I)) +    Changed |= promoteUniformOpToI32(I); + +  return Changed; +} + +bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) { +  bool Changed = false; + +  if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) && +      DA->isUniform(&I)) +    Changed |= promoteUniformOpToI32(I); + +  return Changed; +} + +bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) { +  bool Changed = false; + +  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && +      DA->isUniform(&I)) +    Changed |= promoteUniformOpToI32(I); + +  return Changed; +} + +bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { +  switch (I.getIntrinsicID()) { +  case Intrinsic::bitreverse: +    return visitBitreverseIntrinsicInst(I); +  default: +    return false; +  } +} + +bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) { +  bool Changed = false; + +  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && +      DA->isUniform(&I)) +    Changed |= promoteUniformBitreverseToI32(I); + +  return Changed; +} +  bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {    Mod = &M;    return false;  | 
