diff options
Diffstat (limited to 'lib/Transforms/Utils/SimplifyLibCalls.cpp')
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 510 |
1 files changed, 383 insertions, 127 deletions
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 1bb26caa2af2..e0def81d5eee 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1,9 +1,8 @@ //===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -17,8 +16,10 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" @@ -35,6 +36,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/SizeOpts.h" using namespace llvm; using namespace PatternMatch; @@ -105,6 +107,12 @@ static bool callHasFloatingPointArgument(const CallInst *CI) { }); } +static bool callHasFP128Argument(const CallInst *CI) { + return any_of(CI->operands(), [](const Use &OI) { + return OI->getType()->isFP128Ty(); + }); +} + static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) { if (Base < 2 || Base > 36) // handle special zero base @@ -334,11 +342,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { return ConstantInt::get(CI->getType(), Str1.compare(Str2)); if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x - return B.CreateNeg( - B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType())); + return B.CreateNeg(B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType())); if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x - return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"), + CI->getType()); // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); @@ -398,11 +407,12 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { } if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x - return B.CreateNeg( - B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType())); + return B.CreateNeg(B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType())); if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x - return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"), + CI->getType()); uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); @@ -591,7 +601,8 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, // strlen(x) != 0 --> *x != 0 // strlen(x) == 0 --> *x == 0 if (isOnlyUsedInZeroEqualityComparison(CI)) - return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"), + CI->getType()); return nullptr; } @@ -735,7 +746,8 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { // strstr("abcd", "bc") -> gep((char*)"abcd", 1) Value *Result = castToCStr(CI->getArgOperand(0), B); - Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + Result = + B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } @@ -773,7 +785,8 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { // It would be really nice to reuse switch lowering here but we can't change // the CFG at this point. // - // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0 + // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) + // != 0 // after bounds check. if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { unsigned char Max = @@ -828,27 +841,20 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr"); } -Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { - Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); - - if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Constant::getNullValue(CI->getType()); - - // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!LenC) - return nullptr; - - uint64_t Len = LenC->getZExtValue(); +static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, + uint64_t Len, IRBuilder<> &B, + const DataLayout &DL) { if (Len == 0) // memcmp(s1,s2,0) -> 0 return Constant::getNullValue(CI->getType()); // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS if (Len == 1) { - Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"), - CI->getType(), "lhsv"); - Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"), - CI->getType(), "rhsv"); + Value *LHSV = + B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = + B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); return B.CreateSub(LHSV, RHSV, "chardiff"); } @@ -878,12 +884,12 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { if (!LHSV) { Type *LHSPtrTy = IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); - LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); + LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); } if (!RHSV) { Type *RHSPtrTy = IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); - RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); + RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); } return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); } @@ -907,10 +913,48 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Ret = 1; return ConstantInt::get(CI->getType(), Ret); } + return nullptr; +} + +// Most simplifications for memcmp also apply to bcmp. +Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, + IRBuilder<> &B) { + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Handle constant lengths. + if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) + if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS, + LenC->getZExtValue(), B, DL)) + return Res; + + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { + if (Value *V = optimizeMemCmpBCmpCommon(CI, B)) + return V; + + // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 + // `bcmp` can be more efficient than memcmp because it only has to know that + // there is a difference, not where it is. + if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) { + Value *LHS = CI->getArgOperand(0); + Value *RHS = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + return emitBCmp(LHS, RHS, Size, B, DL, TLI); + } return nullptr; } +Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) { + return optimizeMemCmpBCmpCommon(CI, B); +} + Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, @@ -1031,7 +1075,8 @@ static Value *valueHasFloatPrecision(Value *Val) { /// Shrink double -> float functions. static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, bool isBinary, bool isPrecise = false) { - if (!CI->getType()->isDoubleTy()) + Function *CalleeFn = CI->getCalledFunction(); + if (!CI->getType()->isDoubleTy() || !CalleeFn) return nullptr; // If not all the uses of the function are converted to float, then bail out. @@ -1051,15 +1096,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, if (!V[0] || (isBinary && !V[1])) return nullptr; + StringRef CalleeNm = CalleeFn->getName(); + AttributeList CalleeAt = CalleeFn->getAttributes(); + bool CalleeIn = CalleeFn->isIntrinsic(); + // If call isn't an intrinsic, check that it isn't within a function with the // same name as the float version of this call, otherwise the result is an // infinite loop. For example, from MinGW-w64: // // float expf(float val) { return (float) exp((double) val); } - Function *CalleeFn = CI->getCalledFunction(); - StringRef CalleeNm = CalleeFn->getName(); - AttributeList CalleeAt = CalleeFn->getAttributes(); - if (CalleeFn && !CalleeFn->isIntrinsic()) { + if (!CalleeIn) { const Function *Fn = CI->getFunction(); StringRef FnName = Fn->getName(); if (FnName.back() == 'f' && @@ -1074,7 +1120,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, // g((double) float) -> (double) gf(float) Value *R; - if (CalleeFn->isIntrinsic()) { + if (CalleeIn) { Module *M = CI->getModule(); Intrinsic::ID IID = CalleeFn->getIntrinsicID(); Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); @@ -1132,10 +1178,10 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func, IRBuilder<> &B) { if (!isa<FPMathOperator>(Call)) return nullptr; - + IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(Call->getFastMathFlags()); - + // TODO: Can this be shared to also handle LLVM intrinsics? Value *X; switch (Func) { @@ -1189,7 +1235,8 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { } /// Use exp{,2}(x * y) for pow(exp{,2}(x), y); -/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x). +/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x); +/// exp2(log2(n) * x) for pow(n, x). Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); @@ -1276,12 +1323,12 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; - bool IsInteger = BaseF->isInteger(), - IsReciprocal = BaseR.isInteger(); + bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); const APFloat *NF = IsReciprocal ? &BaseR : BaseF; APSInt NI(64, false); if ((IsInteger || IsReciprocal) && - !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) && + NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK && NI > 1 && NI.isPowerOf2()) { double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); @@ -1301,6 +1348,28 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l, B, Attrs); + // pow(n, x) -> exp2(log2(n) * x) + if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() && + Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) { + Value *Log = nullptr; + if (Ty->isFloatTy()) + Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat())); + else if (Ty->isDoubleTy()) + Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble())); + + if (Log) { + Value *FMul = B.CreateFMul(Log, Expo, "mul"); + if (Pow->doesNotAccessMemory()) { + return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), + FMul, "exp2"); + } else { + if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l)) + return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l, B, Attrs); + } + } + } return nullptr; } @@ -1364,12 +1433,22 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { return Sqrt; } +static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, + IRBuilder<> &B) { + Value *Args[] = {Base, Expo}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + return B.CreateCall(F, Args); +} + Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Value *Base = Pow->getArgOperand(0); + Value *Expo = Pow->getArgOperand(1); Function *Callee = Pow->getCalledFunction(); StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); + Module *M = Pow->getModule(); Value *Shrunk = nullptr; + bool AllowApprox = Pow->hasApproxFunc(); bool Ignored; // Bail out if simplifying libcalls to pow() is disabled. @@ -1382,8 +1461,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // Shrink pow() to powf() if the arguments are single precision, // unless the result is expected to be double precision. - if (UnsafeFPShrink && - Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && + hasFloatVersion(Name)) Shrunk = optimizeBinaryDoubleFP(Pow, B, true); // Evaluate special cases related to the base. @@ -1403,7 +1482,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // pow(x, 0.0) -> 1.0 if (match(Expo, m_SpecificFP(0.0))) - return ConstantFP::get(Ty, 1.0); + return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x if (match(Expo, m_FPOne())) @@ -1418,7 +1497,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + if (AllowApprox && match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. @@ -1442,9 +1521,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { if (!Expo2.isInteger()) return nullptr; - Sqrt = - getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI); + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); } // We will memoize intermediate products of the Addition Chain. @@ -1467,6 +1545,29 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { return FMul; } + + APSInt IntExpo(32, /*isUnsigned=*/false); + // powf(x, n) -> powi(x, n) if n is a constant signed integer value + if (ExpoF->isInteger() && + ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK) { + return createPowWithIntegerExponent( + Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); + } + } + + // powf(x, itofp(y)) -> powi(x, y) + if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) { + Value *IntExpo = cast<Instruction>(Expo)->getOperand(0); + Value *NewExpo = nullptr; + unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); + if (isa<SIToFPInst>(Expo) && BitWidth == 32) + NewExpo = IntExpo; + else if (BitWidth < 32) + NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) + : B.CreateZExt(IntExpo, B.getInt32Ty()); + if (NewExpo) + return createPowWithIntegerExponent(Base, NewExpo, M, B); } return Shrunk; @@ -1504,9 +1605,8 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { One = ConstantExpr::getFPExtend(One, Op->getType()); Module *M = CI->getModule(); - Value *NewCallee = - M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), - Op->getType(), B.getInt32Ty()); + FunctionCallee NewCallee = M->getOrInsertFunction( + TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty()); CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1518,40 +1618,30 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); // If we can shrink the call to a float function rather than a double // function, do that first. + Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) return Ret; + // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to + // the intrinsics for improved optimization (for example, vectorization). + // No-signed-zeros is implied by the definitions of fmax/fmin themselves. + // From the C standard draft WG14/N1256: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(-0.0, +0.0) would return +0; however, implementation in software + // might be impractical." IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - if (CI->isFast()) { - // If the call is 'fast', then anything we create here will also be 'fast'. - FMF.setFast(); - } else { - // At a minimum, no-nans-fp-math must be true. - if (!CI->hasNoNaNs()) - return nullptr; - // No-signed-zeros is implied by the definitions of fmax/fmin themselves: - // "Ideally, fmax would be sensitive to the sign of zero, for example - // fmax(-0. 0, +0. 0) would return +0; however, implementation in software - // might be impractical." - FMF.setNoSignedZeros(); - FMF.setNoNaNs(); - } + FastMathFlags FMF = CI->getFastMathFlags(); + FMF.setNoSignedZeros(); B.setFastMathFlags(FMF); - // We have a relaxed floating-point environment. We can ignore NaN-handling - // and transform to a compare and select. We do not have to consider errno or - // exceptions, because fmin/fmax do not have those. - Value *Op0 = CI->getArgOperand(0); - Value *Op1 = CI->getArgOperand(1); - Value *Cmp = Callee->getName().startswith("fmin") ? - B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); - return B.CreateSelect(Cmp, Op0, Op1); + Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum + : Intrinsic::maxnum; + Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType()); + return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) }); } Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { @@ -1654,13 +1744,13 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { // replace it with the fabs of that factor. Module *M = Callee->getParent(); Type *ArgType = I->getType(); - Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); + Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); if (OtherOp) { // If we found a non-repeated factor, we still need to get its square // root. We then multiply that by the value that was simplified out // of the square root calculation. - Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); + Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); return B.CreateFMul(FabsCall, SqrtCall); } @@ -1728,8 +1818,8 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, } Module *M = OrigCallee->getParent(); - Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), - ResTy, ArgTy); + FunctionCallee Callee = + M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy); if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { // If the argument is an instruction, it must dominate all uses so put our @@ -1840,8 +1930,8 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 Value *Op = CI->getArgOperand(0); Type *ArgType = Op->getType(); - Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), - Intrinsic::cttz, ArgType); + Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), + Intrinsic::cttz, ArgType); Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); @@ -1854,8 +1944,8 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) { // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false)) Value *Op = CI->getArgOperand(0); Type *ArgType = Op->getType(); - Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), - Intrinsic::ctlz, ArgType); + Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), + Intrinsic::ctlz, ArgType); Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz"); V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()), V); @@ -2026,13 +2116,27 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { // arguments. if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *IPrintFFn = + FunctionCallee IPrintFFn = M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); CallInst *New = cast<CallInst>(CI->clone()); New->setCalledFunction(IPrintFFn); B.Insert(New); return New; } + + // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point + // arguments. + if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + auto SmallPrintFFn = + M->getOrInsertFunction(TLI->getName(LibFunc_small_printf), + FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallPrintFFn); + B.Insert(New); + return New; + } + return nullptr; } @@ -2077,7 +2181,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { } if (FormatStr[1] == 's') { - // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str, + // strlen(str)+1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; @@ -2105,13 +2210,27 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { // point arguments. if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *SIPrintFFn = + FunctionCallee SIPrintFFn = M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); CallInst *New = cast<CallInst>(CI->clone()); New->setCalledFunction(SIPrintFFn); B.Insert(New); return New; } + + // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit + // floating point arguments. + if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + auto SmallSPrintFFn = + M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf), + FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallSPrintFFn); + B.Insert(New); + return New; + } + return nullptr; } @@ -2140,7 +2259,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) { else if (N < FormatStr.size() + 1) return nullptr; - // sprintf(str, size, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, + // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt, // strlen(fmt)+1) B.CreateMemCpy( CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, @@ -2262,13 +2381,27 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) { // floating point arguments. if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *FIPrintFFn = + FunctionCallee FIPrintFFn = M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); CallInst *New = cast<CallInst>(CI->clone()); New->setCalledFunction(FIPrintFFn); B.Insert(New); return New; } + + // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no + // 128-bit floating point arguments. + if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + auto SmallFPrintFFn = + M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf), + FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallFPrintFFn); + B.Insert(New); + return New; + } + return nullptr; } @@ -2288,7 +2421,8 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { // If this is writing one byte, turn it into fputc. // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char"); + Value *Char = B.CreateLoad(B.getInt8Ty(), + castToCStr(CI->getArgOperand(0), B), "char"); Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } @@ -2307,7 +2441,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { // Don't rewrite fputs to fwrite when optimising for size because fwrite // requires more arguments and thus extra MOVs are required. - if (CI->getFunction()->optForSize()) + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + if (OptForSize) return nullptr; // Check if has any use @@ -2320,7 +2456,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { return nullptr; } - // fputs(s,F) --> fwrite(s,1,strlen(s),F) + // fputs(s,F) --> fwrite(s,strlen(s),1,F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return nullptr; @@ -2367,18 +2503,14 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { - // Check for a constant string. - StringRef Str; - if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + if (!CI->use_empty()) return nullptr; - if (Str.empty() && CI->use_empty()) { - // puts("") -> putchar('\n') - Value *Res = emitPutChar(B.getInt32('\n'), B, TLI); - if (CI->use_empty() || !Res) - return Res; - return B.CreateIntCast(Res, CI->getType(), true); - } + // Check for a constant string. + // puts("") -> putchar('\n') + StringRef Str; + if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) + return emitPutChar(B.getInt32('\n'), B, TLI); return nullptr; } @@ -2441,6 +2573,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrStr(CI, Builder); case LibFunc_memchr: return optimizeMemChr(CI, Builder); + case LibFunc_bcmp: + return optimizeBCmp(CI, Builder); case LibFunc_memcmp: return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: @@ -2686,9 +2820,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { LibCallSimplifier::LibCallSimplifier( const DataLayout &DL, const TargetLibraryInfo *TLI, OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, function_ref<void(Instruction *, Value *)> Replacer, function_ref<void(Instruction *)> Eraser) - : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI), UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {} void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { @@ -2735,12 +2870,23 @@ void LibCallSimplifier::eraseFromParent(Instruction *I) { // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// -bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, - unsigned ObjSizeOp, - unsigned SizeOp, - bool isString) { - if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp)) +bool +FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, + unsigned ObjSizeOp, + Optional<unsigned> SizeOp, + Optional<unsigned> StrOp, + Optional<unsigned> FlagOp) { + // If this function takes a flag argument, the implementation may use it to + // perform extra checks. Don't fold into the non-checking variant. + if (FlagOp) { + ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp)); + if (!Flag || !Flag->isZero()) + return false; + } + + if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp)) return true; + if (ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { if (ObjSizeCI->isMinusOne()) @@ -2748,23 +2894,27 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, // If the object size wasn't -1 (unknown), bail out if we were asked to. if (OnlyLowerUnknownSize) return false; - if (isString) { - uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp)); + if (StrOp) { + uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp)); // If the length is 0 we don't know how long it is and so we can't // remove the check. if (Len == 0) return false; return ObjSizeCI->getZExtValue() >= Len; } - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp))) - return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue(); + + if (SizeOp) { + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp))) + return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue(); + } } return false; } Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3, 2, false)) { + if (isFortifiedCallFoldable(CI, 3, 2)) { B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); return CI->getArgOperand(0); @@ -2774,7 +2924,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3, 2, false)) { + if (isFortifiedCallFoldable(CI, 3, 2)) { B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); return CI->getArgOperand(0); @@ -2786,7 +2936,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { // TODO: Try foldMallocMemset() here. - if (isFortifiedCallFoldable(CI, 3, 2, false)) { + if (isFortifiedCallFoldable(CI, 3, 2)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); return CI->getArgOperand(0); @@ -2797,8 +2947,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func) { - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); const DataLayout &DL = CI->getModule()->getDataLayout(); Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), *ObjSize = CI->getArgOperand(2); @@ -2814,8 +2962,12 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, 1, true)) - return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); + if (isFortifiedCallFoldable(CI, 2, None, 1)) { + if (Func == LibFunc_strcpy_chk) + return emitStrCpy(Dst, Src, B, TLI); + else + return emitStpCpy(Dst, Src, B, TLI); + } if (OnlyLowerUnknownSize) return nullptr; @@ -2838,13 +2990,99 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func) { - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - if (isFortifiedCallFoldable(CI, 3, 2, false)) { - Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI, Name.substr(2, 7)); - return Ret; + if (isFortifiedCallFoldable(CI, 3, 2)) { + if (Func == LibFunc_strncpy_chk) + return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + else + return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 4, 3)) + return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) { + SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end()); + return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4), VariadicArgs, B, TLI); + } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 2, None, None, 1)) { + SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end()); + return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs, + B, TLI); + } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 2)) + return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3)) + return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3)) + return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3)) + return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) + return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4), CI->getArgOperand(5), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 2, None, None, 1)) + return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), + CI->getArgOperand(4), B, TLI); + return nullptr; } @@ -2892,6 +3130,24 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { case LibFunc_stpncpy_chk: case LibFunc_strncpy_chk: return optimizeStrpNCpyChk(CI, Builder, Func); + case LibFunc_memccpy_chk: + return optimizeMemCCpyChk(CI, Builder); + case LibFunc_snprintf_chk: + return optimizeSNPrintfChk(CI, Builder); + case LibFunc_sprintf_chk: + return optimizeSPrintfChk(CI, Builder); + case LibFunc_strcat_chk: + return optimizeStrCatChk(CI, Builder); + case LibFunc_strlcat_chk: + return optimizeStrLCat(CI, Builder); + case LibFunc_strncat_chk: + return optimizeStrNCatChk(CI, Builder); + case LibFunc_strlcpy_chk: + return optimizeStrLCpyChk(CI, Builder); + case LibFunc_vsnprintf_chk: + return optimizeVSNPrintfChk(CI, Builder); + case LibFunc_vsprintf_chk: + return optimizeVSPrintfChk(CI, Builder); default: break; } |