diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:03:47 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:04:23 +0000 |
commit | 7fa27ce4a07f19b07799a767fc29416f3b625afb (patch) | |
tree | 27825c83636c4de341eb09a74f49f5d38a15d165 /llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | |
parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) |
Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 218 |
1 files changed, 170 insertions, 48 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 20f18322d43c..5b0951252c07 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -14,11 +14,12 @@ #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Triple.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/AttributeMask.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -29,6 +30,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" +#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -44,6 +46,45 @@ static cl::opt<bool> cl::desc("Enable unsafe double to float " "shrinking for math lib calls")); +// Enable conversion of operator new calls with a MemProf hot or cold hint +// to an operator new call that takes a hot/cold hint. Off by default since +// not all allocators currently support this extension. +static cl::opt<bool> + OptimizeHotColdNew("optimize-hot-cold-new", cl::Hidden, cl::init(false), + cl::desc("Enable hot/cold operator new library calls")); + +namespace { + +// Specialized parser to ensure the hint is an 8 bit value (we can't specify +// uint8_t to opt<> as that is interpreted to mean that we are passing a char +// option with a specific set of values. +struct HotColdHintParser : public cl::parser<unsigned> { + HotColdHintParser(cl::Option &O) : cl::parser<unsigned>(O) {} + + bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) { + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for uint argument!"); + + if (Value > 255) + return O.error("'" + Arg + "' value must be in the range [0, 255]!"); + + return false; + } +}; + +} // end anonymous namespace + +// Hot/cold operator new takes an 8 bit hotness hint, where 0 is the coldest +// and 255 is the hottest. Default to 1 value away from the coldest and hottest +// hints, so that the compiler hinted allocations are slightly less strong than +// manually inserted hints at the two extremes. +static cl::opt<unsigned, false, HotColdHintParser> ColdNewHintValue( + "cold-new-hint-value", cl::Hidden, cl::init(1), + cl::desc("Value to pass to hot/cold operator new for cold allocation")); +static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue( + "hot-new-hint-value", cl::Hidden, cl::init(254), + cl::desc("Value to pass to hot/cold operator new for hot allocation")); + //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// @@ -186,21 +227,9 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr, return ConstantInt::get(RetTy, Result); } -static bool isOnlyUsedInComparisonWithZero(Value *V) { - for (User *U : V->users()) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) - if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, const DataLayout &DL) { - if (!isOnlyUsedInComparisonWithZero(CI)) + if (!isOnlyUsedInZeroComparison(CI)) return false; if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL)) @@ -1358,6 +1387,10 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { return nullptr; } + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); + // If the char is variable but the input str and length are not we can turn // this memchr call into a simple bit field test. Of course this only works // when the return value is only checked against null. @@ -1368,7 +1401,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) // != 0 // after bounds check. - if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) + if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) return nullptr; unsigned char Max = @@ -1380,8 +1413,34 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { // FIXME: On a 64 bit architecture this prevents us from using the // interesting range of alpha ascii chars. We could do better by emitting // two bitfields or shifting the range by 64 if no lower chars are used. - if (!DL.fitsInLegalInteger(Max + 1)) - return nullptr; + if (!DL.fitsInLegalInteger(Max + 1)) { + // Build chain of ORs + // Transform: + // memchr("abcd", C, 4) != nullptr + // to: + // (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0 + std::string SortedStr = Str.str(); + llvm::sort(SortedStr); + // Compute the number of of non-contiguous ranges. + unsigned NonContRanges = 1; + for (size_t i = 1; i < SortedStr.size(); ++i) { + if (SortedStr[i] > SortedStr[i - 1] + 1) { + NonContRanges++; + } + } + + // Restrict this optimization to profitable cases with one or two range + // checks. + if (NonContRanges > 2) + return nullptr; + + SmallVector<Value *> CharCompares; + for (unsigned char C : SortedStr) + CharCompares.push_back( + B.CreateICmpEQ(CharVal, ConstantInt::get(CharVal->getType(), C))); + + return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType()); + } // For the bit field use a power-of-2 type with at least 8 bits to avoid // creating unnecessary illegal types. @@ -1481,30 +1540,21 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, // First, see if we can fold either argument to a constant. Value *LHSV = nullptr; - if (auto *LHSC = dyn_cast<Constant>(LHS)) { - LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo()); + if (auto *LHSC = dyn_cast<Constant>(LHS)) LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL); - } + Value *RHSV = nullptr; - if (auto *RHSC = dyn_cast<Constant>(RHS)) { - RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo()); + if (auto *RHSC = dyn_cast<Constant>(RHS)) RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL); - } // Don't generate unaligned loads. If either source is constant data, // alignment doesn't matter for that source because there is no load. if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) && (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) { - if (!LHSV) { - Type *LHSPtrTy = - IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); - LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); - } - if (!RHSV) { - Type *RHSPtrTy = - IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); - RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); - } + if (!LHSV) + LHSV = B.CreateLoad(IntType, LHS, "lhsv"); + if (!RHSV) + RHSV = B.CreateLoad(IntType, RHS, "rhsv"); return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); } } @@ -1653,6 +1703,59 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) { return nullptr; } +// When enabled, replace operator new() calls marked with a hot or cold memprof +// attribute with an operator new() call that takes a __hot_cold_t parameter. +// Currently this is supported by the open source version of tcmalloc, see: +// https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h +Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, + LibFunc &Func) { + if (!OptimizeHotColdNew) + return nullptr; + + uint8_t HotCold; + if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "cold") + HotCold = ColdNewHintValue; + else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "hot") + HotCold = HotNewHintValue; + else + return nullptr; + + switch (Func) { + case LibFunc_Znwm: + return emitHotColdNew(CI->getArgOperand(0), B, TLI, + LibFunc_Znwm12__hot_cold_t, HotCold); + case LibFunc_Znam: + return emitHotColdNew(CI->getArgOperand(0), B, TLI, + LibFunc_Znam12__hot_cold_t, HotCold); + case LibFunc_ZnwmRKSt9nothrow_t: + return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, + HotCold); + case LibFunc_ZnamRKSt9nothrow_t: + return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, + HotCold); + case LibFunc_ZnwmSt11align_val_t: + return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI, LibFunc_ZnwmSt11align_val_t12__hot_cold_t, + HotCold); + case LibFunc_ZnamSt11align_val_t: + return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI, LibFunc_ZnamSt11align_val_t12__hot_cold_t, + HotCold); + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: + return emitHotColdNewAlignedNoThrow( + CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, + TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: + return emitHotColdNewAlignedNoThrow( + CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, + TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); + default: + return nullptr; + } +} + //===----------------------------------------------------------------------===// // Math Library Optimizations //===----------------------------------------------------------------------===// @@ -1939,7 +2042,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { AttributeList NoAttrs; // Attributes are only meaningful on the original call // pow(2.0, itofp(x)) -> ldexp(1.0, x) - if (match(Base, m_SpecificFP(2.0)) && + // TODO: This does not work for vectors because there is no ldexp intrinsic. + if (!Ty->isVectorTy() && match(Base, m_SpecificFP(2.0)) && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) && hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) @@ -2056,7 +2160,7 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting // errno), but sqrt(-Inf) is required by various standards to set errno. if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() && - !isKnownNeverInfinity(Base, TLI)) + !isKnownNeverInfinity(Base, DL, TLI, 0, AC, Pow)) return nullptr; Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B, @@ -2217,17 +2321,25 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { hasFloatVersion(M, Name)) Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); + // Bail out for vectors because the code below only expects scalars. + // TODO: This could be allowed if we had a ldexp intrinsic (D14327). Type *Ty = CI->getType(); - Value *Op = CI->getArgOperand(0); + if (Ty->isVectorTy()) + return Ret; // exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize // exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize + Value *Op = CI->getArgOperand(0); if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) && hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { - if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) - return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, - LibFunc_ldexp, LibFunc_ldexpf, - LibFunc_ldexpl, B, AttributeList()); + if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) { + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + return copyFlags( + *CI, emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, + LibFunc_ldexp, LibFunc_ldexpf, + LibFunc_ldexpl, B, AttributeList())); + } } return Ret; @@ -2579,7 +2691,7 @@ static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg, return true; } -Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) { +Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B) { // Make sure the prototype is as expected, otherwise the rest of the // function is probably invalid and likely to abort. if (!isTrigLibCall(CI)) @@ -2618,7 +2730,7 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) { replaceTrigInsts(CosCalls, Cos); replaceTrigInsts(SinCosCalls, SinCos); - return nullptr; + return IsSin ? Sin : Cos; } void LibCallSimplifier::classifyArgUse( @@ -3439,6 +3551,15 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeWcslen(CI, Builder); case LibFunc_bcopy: return optimizeBCopy(CI, Builder); + case LibFunc_Znwm: + case LibFunc_ZnwmRKSt9nothrow_t: + case LibFunc_ZnwmSt11align_val_t: + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: + case LibFunc_Znam: + case LibFunc_ZnamRKSt9nothrow_t: + case LibFunc_ZnamSt11align_val_t: + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: + return optimizeNew(CI, Builder, Func); default: break; } @@ -3461,9 +3582,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, switch (Func) { case LibFunc_sinpif: case LibFunc_sinpi: + return optimizeSinCosPi(CI, /*IsSin*/true, Builder); case LibFunc_cospif: case LibFunc_cospi: - return optimizeSinCosPi(CI, Builder); + return optimizeSinCosPi(CI, /*IsSin*/false, Builder); case LibFunc_powf: case LibFunc_pow: case LibFunc_powl: @@ -3696,13 +3818,13 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { } LibCallSimplifier::LibCallSimplifier( - const DataLayout &DL, const TargetLibraryInfo *TLI, - OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + const DataLayout &DL, const TargetLibraryInfo *TLI, AssumptionCache *AC, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, function_ref<void(Instruction *, Value *)> Replacer, function_ref<void(Instruction *)> Eraser) - : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI), - Replacer(Replacer), Eraser(Eraser) {} + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), AC(AC), ORE(ORE), BFI(BFI), + PSI(PSI), Replacer(Replacer), Eraser(Eraser) {} void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { // Indirect through the replacer used in this instance. |