src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-07-26 19:03:47 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-07-26 19:04:23 +0000
commit	7fa27ce4a07f19b07799a767fc29416f3b625afb (patch)
tree	27825c83636c4de341eb09a74f49f5d38a15d165 /llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
parent	e3b557809604d036af6e00c60f012c2025b59a5e (diff)

vendor/llvm-project/llvmorg-17-init-19304-gd0b54bb50e51

Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp')

-rw-r--r--

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

218

1 files changed, 170 insertions, 48 deletions

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 20f18322d43c..5b0951252c07 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

@@ -14,11 +14,12 @@

#include "llvm/Transforms/Utils/SimplifyLibCalls.h"

#include "llvm/ADT/APSInt.h"

#include "llvm/ADT/SmallString.h"

-#include "llvm/ADT/Triple.h"

+#include "llvm/ADT/StringExtras.h"

#include "llvm/Analysis/ConstantFolding.h"

#include "llvm/Analysis/Loads.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/Analysis/ValueTracking.h"

+#include "llvm/IR/AttributeMask.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

@@ -29,6 +30,7 @@

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/KnownBits.h"

#include "llvm/Support/MathExtras.h"

+#include "llvm/TargetParser/Triple.h"

#include "llvm/Transforms/Utils/BuildLibCalls.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/SizeOpts.h"

@@ -44,6 +46,45 @@ static cl::opt<bool>

cl::desc("Enable unsafe double to float "

"shrinking for math lib calls"));

+// Enable conversion of operator new calls with a MemProf hot or cold hint

+// to an operator new call that takes a hot/cold hint. Off by default since

+// not all allocators currently support this extension.

+static cl::opt<bool>

+ OptimizeHotColdNew("optimize-hot-cold-new", cl::Hidden, cl::init(false),

+ cl::desc("Enable hot/cold operator new library calls"));

+namespace {

+// Specialized parser to ensure the hint is an 8 bit value (we can't specify

+// uint8_t to opt<> as that is interpreted to mean that we are passing a char

+// option with a specific set of values.

+struct HotColdHintParser : public cl::parser<unsigned> {

+ HotColdHintParser(cl::Option &O) : cl::parser<unsigned>(O) {}

+ bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {

+ if (Arg.getAsInteger(0, Value))

+ return O.error("'" + Arg + "' value invalid for uint argument!");

+ if (Value > 255)

+ return O.error("'" + Arg + "' value must be in the range [0, 255]!");

+ return false;

+ }

+};

+} // end anonymous namespace

+// Hot/cold operator new takes an 8 bit hotness hint, where 0 is the coldest

+// and 255 is the hottest. Default to 1 value away from the coldest and hottest

+// hints, so that the compiler hinted allocations are slightly less strong than

+// manually inserted hints at the two extremes.

+static cl::opt<unsigned, false, HotColdHintParser> ColdNewHintValue(

+ "cold-new-hint-value", cl::Hidden, cl::init(1),

+ cl::desc("Value to pass to hot/cold operator new for cold allocation"));

+static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue(

+ "hot-new-hint-value", cl::Hidden, cl::init(254),

+ cl::desc("Value to pass to hot/cold operator new for hot allocation"));

//===----------------------------------------------------------------------===//

// Helper Functions

//===----------------------------------------------------------------------===//

@@ -186,21 +227,9 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,

return ConstantInt::get(RetTy, Result);

}

-static bool isOnlyUsedInComparisonWithZero(Value *V) {

- for (User *U : V->users()) {

- if (ICmpInst *IC = dyn_cast<ICmpInst>(U))

- if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))

- if (C->isNullValue())

- continue;

- // Unknown instruction.

- return false;

- }

- return true;

static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,

const DataLayout &DL) {

- if (!isOnlyUsedInComparisonWithZero(CI))

+ if (!isOnlyUsedInZeroComparison(CI))

return false;

if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))

@@ -1358,6 +1387,10 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {

return nullptr;

}

+ bool OptForSize = CI->getFunction()->hasOptSize() ||

+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,

+ PGSOQueryType::IRPass);

// If the char is variable but the input str and length are not we can turn

// this memchr call into a simple bit field test. Of course this only works

// when the return value is only checked against null.

@@ -1368,7 +1401,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {

// memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))

// != 0

// after bounds check.

- if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))

+ if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))

return nullptr;

unsigned char Max =

@@ -1380,8 +1413,34 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {

// FIXME: On a 64 bit architecture this prevents us from using the

// interesting range of alpha ascii chars. We could do better by emitting

// two bitfields or shifting the range by 64 if no lower chars are used.

- if (!DL.fitsInLegalInteger(Max + 1))

- return nullptr;

+ if (!DL.fitsInLegalInteger(Max + 1)) {

+ // Build chain of ORs

+ // Transform:

+ // memchr("abcd", C, 4) != nullptr

+ // to:

+ // (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0

+ std::string SortedStr = Str.str();

+ llvm::sort(SortedStr);

+ // Compute the number of of non-contiguous ranges.

+ unsigned NonContRanges = 1;

+ for (size_t i = 1; i < SortedStr.size(); ++i) {

+ if (SortedStr[i] > SortedStr[i - 1] + 1) {

+ NonContRanges++;

+ }

+ // Restrict this optimization to profitable cases with one or two range

+ // checks.

+ if (NonContRanges > 2)

+ return nullptr;

+ SmallVector<Value *> CharCompares;

+ for (unsigned char C : SortedStr)

+ CharCompares.push_back(

+ B.CreateICmpEQ(CharVal, ConstantInt::get(CharVal->getType(), C)));

+ return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType());

+ }

// For the bit field use a power-of-2 type with at least 8 bits to avoid

// creating unnecessary illegal types.

@@ -1481,30 +1540,21 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,

// First, see if we can fold either argument to a constant.

Value *LHSV = nullptr;

- if (auto *LHSC = dyn_cast<Constant>(LHS)) {

- LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());

+ if (auto *LHSC = dyn_cast<Constant>(LHS))

LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);

- }

Value *RHSV = nullptr;

- if (auto *RHSC = dyn_cast<Constant>(RHS)) {

- RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());

+ if (auto *RHSC = dyn_cast<Constant>(RHS))

RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);

- }

// Don't generate unaligned loads. If either source is constant data,

// alignment doesn't matter for that source because there is no load.

if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&

(RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {

- if (!LHSV) {

- Type *LHSPtrTy =

- IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());

- LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");

- }

- if (!RHSV) {

- Type *RHSPtrTy =

- IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());

- RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");

- }

+ if (!LHSV)

+ LHSV = B.CreateLoad(IntType, LHS, "lhsv");

+ if (!RHSV)

+ RHSV = B.CreateLoad(IntType, RHS, "rhsv");

return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");

}

@@ -1653,6 +1703,59 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {

return nullptr;

}

+// When enabled, replace operator new() calls marked with a hot or cold memprof

+// attribute with an operator new() call that takes a __hot_cold_t parameter.

+// Currently this is supported by the open source version of tcmalloc, see:

+// https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h

+Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,

+ LibFunc &Func) {

+ if (!OptimizeHotColdNew)

+ return nullptr;

+ uint8_t HotCold;

+ if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "cold")

+ HotCold = ColdNewHintValue;

+ else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "hot")

+ HotCold = HotNewHintValue;

+ else

+ return nullptr;

+ switch (Func) {

+ case LibFunc_Znwm:

+ return emitHotColdNew(CI->getArgOperand(0), B, TLI,

+ LibFunc_Znwm12__hot_cold_t, HotCold);

+ case LibFunc_Znam:

+ return emitHotColdNew(CI->getArgOperand(0), B, TLI,

+ LibFunc_Znam12__hot_cold_t, HotCold);

+ case LibFunc_ZnwmRKSt9nothrow_t:

+ return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,

+ TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t,

+ HotCold);

+ case LibFunc_ZnamRKSt9nothrow_t:

+ return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,

+ TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t,

+ HotCold);

+ case LibFunc_ZnwmSt11align_val_t:

+ return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,

+ TLI, LibFunc_ZnwmSt11align_val_t12__hot_cold_t,

+ HotCold);

+ case LibFunc_ZnamSt11align_val_t:

+ return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,

+ TLI, LibFunc_ZnamSt11align_val_t12__hot_cold_t,

+ HotCold);

+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:

+ return emitHotColdNewAlignedNoThrow(

+ CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,

+ TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);

+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:

+ return emitHotColdNewAlignedNoThrow(

+ CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,

+ TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);

+ default:

+ return nullptr;

+ }

//===----------------------------------------------------------------------===//

// Math Library Optimizations

//===----------------------------------------------------------------------===//

@@ -1939,7 +2042,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {

AttributeList NoAttrs; // Attributes are only meaningful on the original call

// pow(2.0, itofp(x)) -> ldexp(1.0, x)

- if (match(Base, m_SpecificFP(2.0)) &&

+ // TODO: This does not work for vectors because there is no ldexp intrinsic.

+ if (!Ty->isVectorTy() && match(Base, m_SpecificFP(2.0)) &&

(isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&

hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {

if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))

@@ -2056,7 +2160,7 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {

// pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting

// errno), but sqrt(-Inf) is required by various standards to set errno.

if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&

- !isKnownNeverInfinity(Base, TLI))

+ !isKnownNeverInfinity(Base, DL, TLI, 0, AC, Pow))

return nullptr;

Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B,

@@ -2217,17 +2321,25 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {

hasFloatVersion(M, Name))

Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);

+ // Bail out for vectors because the code below only expects scalars.

+ // TODO: This could be allowed if we had a ldexp intrinsic (D14327).

Type *Ty = CI->getType();

- Value *Op = CI->getArgOperand(0);

+ if (Ty->isVectorTy())

+ return Ret;

// exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize

// exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize

+ Value *Op = CI->getArgOperand(0);

if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&

hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {

- if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize()))

- return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,

- LibFunc_ldexp, LibFunc_ldexpf,

- LibFunc_ldexpl, B, AttributeList());

+ if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) {

+ IRBuilderBase::FastMathFlagGuard Guard(B);

+ B.setFastMathFlags(CI->getFastMathFlags());

+ return copyFlags(

+ *CI, emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,

+ LibFunc_ldexp, LibFunc_ldexpf,

+ LibFunc_ldexpl, B, AttributeList()));

+ }

}

return Ret;

@@ -2579,7 +2691,7 @@ static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,

return true;

}

-Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {

+Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B) {

// Make sure the prototype is as expected, otherwise the rest of the

// function is probably invalid and likely to abort.

if (!isTrigLibCall(CI))

@@ -2618,7 +2730,7 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {

replaceTrigInsts(CosCalls, Cos);

replaceTrigInsts(SinCosCalls, SinCos);

- return nullptr;

+ return IsSin ? Sin : Cos;

}

void LibCallSimplifier::classifyArgUse(

@@ -3439,6 +3551,15 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,

return optimizeWcslen(CI, Builder);

case LibFunc_bcopy:

return optimizeBCopy(CI, Builder);

+ case LibFunc_Znwm:

+ case LibFunc_ZnwmRKSt9nothrow_t:

+ case LibFunc_ZnwmSt11align_val_t:

+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:

+ case LibFunc_Znam:

+ case LibFunc_ZnamRKSt9nothrow_t:

+ case LibFunc_ZnamSt11align_val_t:

+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:

+ return optimizeNew(CI, Builder, Func);

default:

break;

}

@@ -3461,9 +3582,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,

switch (Func) {

case LibFunc_sinpif:

case LibFunc_sinpi:

+ return optimizeSinCosPi(CI, /*IsSin*/true, Builder);

case LibFunc_cospif:

case LibFunc_cospi:

- return optimizeSinCosPi(CI, Builder);

+ return optimizeSinCosPi(CI, /*IsSin*/false, Builder);

case LibFunc_powf:

case LibFunc_pow:

case LibFunc_powl:

@@ -3696,13 +3818,13 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {

}

LibCallSimplifier::LibCallSimplifier(

- const DataLayout &DL, const TargetLibraryInfo *TLI,

- OptimizationRemarkEmitter &ORE,

- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,

+ const DataLayout &DL, const TargetLibraryInfo *TLI, AssumptionCache *AC,

+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,

+ ProfileSummaryInfo *PSI,

function_ref<void(Instruction *, Value *)> Replacer,

function_ref<void(Instruction *)> Eraser)

- : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),

- Replacer(Replacer), Eraser(Eraser) {}

+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), AC(AC), ORE(ORE), BFI(BFI),

+ PSI(PSI), Replacer(Replacer), Eraser(Eraser) {}

void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {

// Indirect through the replacer used in this instance.