1 files changed, 118 insertions, 119 deletions
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 8c48597fc2e4..15e035874002 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -890,7 +890,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
     return nullptr;
 
   // Replace the malloc with a calloc. We need the data layout to know what the
-  // actual size of a 'size_t' parameter is. 
+  // actual size of a 'size_t' parameter is.
   B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
   const DataLayout &DL = Malloc->getModule()->getDataLayout();
   IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
@@ -970,7 +970,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
   Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
   if (V == nullptr)
     return nullptr;
-  
+
   // If call isn't an intrinsic, check that it isn't within a function with the
   // same name as the float version of this call.
   //
@@ -1126,165 +1126,164 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
   if (!Pow->isFast())
     return nullptr;
 
-  const APFloat *Arg1C;
-  if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C)))
-    return nullptr;
-  if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5))
+  Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+  Type *Ty = Pow->getType();
+
+  const APFloat *ExpoF;
+  if (!match(Expo, m_APFloat(ExpoF)) ||
+      (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
     return nullptr;
 
-  // Fast-math flags from the pow() are propagated to all replacement ops.
-  IRBuilder<>::FastMathFlagGuard Guard(B);
-  B.setFastMathFlags(Pow->getFastMathFlags());
-  Type *Ty = Pow->getType();
-  Value *Sqrt;
+  // If errno is never set, then use the intrinsic for sqrt().
   if (Pow->hasFnAttr(Attribute::ReadNone)) {
-    // We know that errno is never set, so replace with an intrinsic:
-    // pow(x, 0.5) --> llvm.sqrt(x)
-    // llvm.pow(x, 0.5) --> llvm.sqrt(x)
-    auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty);
-    Sqrt = B.CreateCall(F, Pow->getArgOperand(0));
-  } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf,
-                             LibFunc_sqrtl)) {
-    // Errno could be set, so we must use a sqrt libcall.
-    // TODO: We also should check that the target can in fact lower the sqrt
-    // libcall. We currently have no way to ask this question, so we ask
-    // whether the target has a sqrt libcall which is not exactly the same.
-    Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0),
-                                TLI->getName(LibFunc_sqrt), B,
+    Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(),
+                                                 Intrinsic::sqrt, Ty);
+    Sqrt = B.CreateCall(SqrtFn, Base);
+  }
+  // Otherwise, use the libcall for sqrt().
+  else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+    // TODO: We also should check that the target can in fact lower the sqrt()
+    // libcall. We currently have no way to ask this question, so we ask if
+    // the target has a sqrt() libcall, which is not exactly the same.
+    Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B,
                                 Pow->getCalledFunction()->getAttributes());
-  } else {
-    // We can't replace with an intrinsic or a libcall.
+  else
     return nullptr;
-  }
 
-  // If this is pow(x, -0.5), get the reciprocal.
-  if (Arg1C->isExactlyValue(-0.5))
-    Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt);
+  // If the exponent is negative, then get the reciprocal.
+  if (ExpoF->isNegative())
+    Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
 
   return Sqrt;
 }
 
-Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
-  Value *Ret = nullptr;
+Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
+  Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+  Function *Callee = Pow->getCalledFunction();
+  AttributeList Attrs = Callee->getAttributes();
   StringRef Name = Callee->getName();
-  if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
-    Ret = optimizeUnaryDoubleFP(CI, B, true);
+  Module *Module = Pow->getModule();
+  Type *Ty = Pow->getType();
+  Value *Shrunk = nullptr;
+  bool Ignored;
+
+  if (UnsafeFPShrink &&
+      Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+    Shrunk = optimizeUnaryDoubleFP(Pow, B, true);
+
+  // Propagate the math semantics from the call to any created instructions.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(Pow->getFastMathFlags());
 
-  Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+  // Evaluate special cases related to the base.
 
   // pow(1.0, x) -> 1.0
-  if (match(Op1, m_SpecificFP(1.0)))
-    return Op1;
-  // pow(2.0, x) -> llvm.exp2(x)
-  if (match(Op1, m_SpecificFP(2.0))) {
-    Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2,
-                                            CI->getType());
-    return B.CreateCall(Exp2, Op2, "exp2");
-  }
-
-  // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will
-  // be one.
-  if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
-    // pow(10.0, x) -> exp10(x)
-    if (Op1C->isExactlyValue(10.0) &&
-        hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f,
-                        LibFunc_exp10l))
-      return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B,
-                                  Callee->getAttributes());
+  if (match(Base, m_SpecificFP(1.0)))
+    return Base;
+
+  // pow(2.0, x) -> exp2(x)
+  if (match(Base, m_SpecificFP(2.0))) {
+    Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty);
+    return B.CreateCall(Exp2, Expo, "exp2");
   }
 
+  // pow(10.0, x) -> exp10(x)
+  if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base))
+    // There's no exp10 intrinsic yet, but, maybe, some day there shall be one.
+    if (BaseC->isExactlyValue(10.0) &&
+        hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+      return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs);
+
   // pow(exp(x), y) -> exp(x * y)
   // pow(exp2(x), y) -> exp2(x * y)
   // We enable these only with fast-math. Besides rounding differences, the
   // transformation changes overflow and underflow behavior quite dramatically.
   // Example: x = 1000, y = 0.001.
   // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
-  auto *OpC = dyn_cast<CallInst>(Op1);
-  if (OpC && OpC->isFast() && CI->isFast()) {
-    LibFunc Func;
-    Function *OpCCallee = OpC->getCalledFunction();
-    if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
-        TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) {
+  auto *BaseFn = dyn_cast<CallInst>(Base);
+  if (BaseFn && BaseFn->isFast() && Pow->isFast()) {
+    LibFunc LibFn;
+    Function *CalleeFn = BaseFn->getCalledFunction();
+    if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
+        (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) {
       IRBuilder<>::FastMathFlagGuard Guard(B);
-      B.setFastMathFlags(CI->getFastMathFlags());
-      Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
-      return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
-                                  OpCCallee->getAttributes());
+      B.setFastMathFlags(Pow->getFastMathFlags());
+
+      Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
+      return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B,
+                                  CalleeFn->getAttributes());
     }
   }
 
-  if (Value *Sqrt = replacePowWithSqrt(CI, B))
+  // Evaluate special cases related to the exponent.
+
+  if (Value *Sqrt = replacePowWithSqrt(Pow, B))
     return Sqrt;
 
-  ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
-  if (!Op2C)
-    return Ret;
+  ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo);
+  if (!ExpoC)
+    return Shrunk;
 
-  if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
-    return ConstantFP::get(CI->getType(), 1.0);
+  // pow(x, -1.0) -> 1.0 / x
+  if (ExpoC->isExactlyValue(-1.0))
+    return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
 
-  // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
-  if (Op2C->isExactlyValue(0.5) &&
-      hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
-                      LibFunc_sqrtl)) {
-    // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
-    // This is faster than calling pow, and still handles negative zero
-    // and negative infinity correctly.
-    // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
-    Value *Inf = ConstantFP::getInfinity(CI->getType());
-    Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+  // pow(x, 0.0) -> 1.0
+  if (ExpoC->getValueAPF().isZero())
+    return ConstantFP::get(Ty, 1.0);
 
-    // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
-    // intrinsic, to match errno semantics.
-    Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
+  // pow(x, 1.0) -> x
+  if (ExpoC->isExactlyValue(1.0))
+    return Base;
 
-    Module *M = Callee->getParent();
-    Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs,
-                                                CI->getType());
-    Value *FAbs = B.CreateCall(FabsF, Sqrt);
+  // pow(x, 2.0) -> x * x
+  if (ExpoC->isExactlyValue(2.0))
+    return B.CreateFMul(Base, Base, "square");
 
-    Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
-    Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
-    return Sel;
+  // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
+  if (ExpoC->isExactlyValue(0.5) &&
+      hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) {
+    // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+    // This is faster than calling pow(), and still handles -0.0 and
+    // negative infinity correctly.
+    // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+    Value *PosInf = ConstantFP::getInfinity(Ty);
+    Value *NegInf = ConstantFP::getInfinity(Ty, true);
+
+    // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is
+    // an intrinsic, to match errno semantics.
+    Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt),
+                                       B, Attrs);
+    Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty);
+    Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs");
+    Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
+    Sqrt = B.CreateSelect(FCmp, PosInf, FAbs);
+    return Sqrt;
   }
 
-  // Propagate fast-math-flags from the call to any created instructions.
-  IRBuilder<>::FastMathFlagGuard Guard(B);
-  B.setFastMathFlags(CI->getFastMathFlags());
-  // pow(x, 1.0) --> x
-  if (Op2C->isExactlyValue(1.0))
-    return Op1;
-  // pow(x, 2.0) --> x * x
-  if (Op2C->isExactlyValue(2.0))
-    return B.CreateFMul(Op1, Op1, "pow2");
-  // pow(x, -1.0) --> 1.0 / x
-  if (Op2C->isExactlyValue(-1.0))
-    return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
-
-  // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
-  if (CI->isFast()) {
-    APFloat V = abs(Op2C->getValueAPF());
-    // We limit to a max of 7 fmul(s). Thus max exponent is 32.
+  // pow(x, n) -> x * x * x * ....
+  if (Pow->isFast()) {
+    APFloat ExpoA = abs(ExpoC->getValueAPF());
+    // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32.
     // This transformation applies to integer exponents only.
-    if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan ||
-        !V.isInteger())
+    if (!ExpoA.isInteger() ||
+        ExpoA.compare
+            (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan)
       return nullptr;
 
     // We will memoize intermediate products of the Addition Chain.
     Value *InnerChain[33] = {nullptr};
-    InnerChain[1] = Op1;
-    InnerChain[2] = B.CreateFMul(Op1, Op1);
+    InnerChain[1] = Base;
+    InnerChain[2] = B.CreateFMul(Base, Base, "square");
 
     // We cannot readily convert a non-double type (like float) to a double.
-    // So we first convert V to something which could be converted to double.
-    bool Ignored;
-    V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
-    
-    Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
-    // For negative exponents simply compute the reciprocal.
-    if (Op2C->isNegative())
-      FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+    // So we first convert it to something which could be converted to double.
+    ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
+    Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+
+    // If the exponent is negative, then get the reciprocal.
+    if (ExpoC->isNegative())
+      FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
     return FMul;
   }