summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp')
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp324
1 files changed, 283 insertions, 41 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index dcfc28887a48..be6c8c631001 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -110,7 +110,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/MisExpect.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -249,6 +248,38 @@ static cl::opt<bool>
"optimization remarks: -{Rpass|"
"pass-remarks}=pgo-instrumentation"));
+static cl::opt<bool> PGOInstrumentEntry(
+ "pgo-instrument-entry", cl::init(false), cl::Hidden,
+ cl::desc("Force to instrument function entry basicblock."));
+
+static cl::opt<bool>
+ PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
+ cl::desc("Fix function entry count in profile use."));
+
+static cl::opt<bool> PGOVerifyHotBFI(
+ "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print out the non-match BFI count if a hot raw profile count "
+ "becomes non-hot, or a cold raw profile count becomes hot. "
+ "The print is enabled under -Rpass-analysis=pgo, or "
+ "internal option -pass-remakrs-analysis=pgo."));
+
+static cl::opt<bool> PGOVerifyBFI(
+ "pgo-verify-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print out mismatched BFI counts after setting profile metadata "
+ "The print is enabled under -Rpass-analysis=pgo, or "
+ "internal option -pass-remakrs-analysis=pgo."));
+
+static cl::opt<unsigned> PGOVerifyBFIRatio(
+ "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden,
+ cl::desc("Set the threshold for pgo-verify-big -- only print out "
+ "mismatched BFI if the difference percentage is greater than "
+ "this value (in percentage)."));
+
+static cl::opt<unsigned> PGOVerifyBFICutoff(
+ "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden,
+ cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose "
+ "profile count value is below."));
+
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
extern cl::opt<PGOViewCountsType> PGOViewCounts;
@@ -257,6 +288,10 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts;
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+static cl::opt<bool>
+ PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
+ cl::desc("Use the old CFG function hashing"));
+
// Return a string describing the branch condition that can be
// used in static branch probability heuristics:
static std::string getBranchCondString(Instruction *TI) {
@@ -425,7 +460,7 @@ public:
private:
bool runOnModule(Module &M) override {
createProfileFileNameVar(M, InstrProfileOutput);
- createIRLevelProfileFlagVar(M, true);
+ createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
return false;
}
std::string InstrProfileOutput;
@@ -572,9 +607,11 @@ public:
Function &Func, TargetLibraryInfo &TLI,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
- BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
+ BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
+ bool InstrumentFuncEntry = true)
: F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
- ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) {
+ ValueSites(IPVK_Last + 1), SIVisitor(Func),
+ MST(F, InstrumentFuncEntry, BPI, BFI) {
// This should be done before CFG hash computation.
SIVisitor.countSelects(Func);
ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
@@ -611,7 +648,8 @@ public:
} // end anonymous namespace
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
-// value of each BB in the CFG. The higher 32 bits record the number of edges.
+// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
+// of selects, indirect calls, mem ops and edges.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
std::vector<uint8_t> Indexes;
@@ -630,12 +668,31 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
}
JC.update(Indexes);
- // Hash format for context sensitive profile. Reserve 4 bits for other
- // information.
- FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
- (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
- //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
- (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+ JamCRC JCH;
+ if (PGOOldCFGHashing) {
+ // Hash format for context sensitive profile. Reserve 4 bits for other
+ // information.
+ FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
+ (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
+ //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
+ (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+ } else {
+ // The higher 32 bits.
+ auto updateJCH = [&JCH](uint64_t Num) {
+ uint8_t Data[8];
+ support::endian::write64le(Data, Num);
+ JCH.update(Data);
+ };
+ updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
+ updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
+ updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
+ updateJCH((uint64_t)MST.AllEdges.size());
+
+ // Hash format for context sensitive profile. Reserve 4 bits for other
+ // information.
+ FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
+ }
+
// Reserve bit 60-63 for other information purpose.
FunctionHash &= 0x0FFFFFFFFFFFFFFF;
if (IsCS)
@@ -644,8 +701,12 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
<< " CRC = " << JC.getCRC()
<< ", Selects = " << SIVisitor.getNumOfSelectInsts()
<< ", Edges = " << MST.AllEdges.size() << ", ICSites = "
- << ValueSites[IPVK_IndirectCallTarget].size()
- << ", Hash = " << FunctionHash << "\n";);
+ << ValueSites[IPVK_IndirectCallTarget].size());
+ if (!PGOOldCFGHashing) {
+ LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
+ << ", High32 CRC = " << JCH.getCRC());
+ }
+ LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
}
// Check if we can safely rename this Comdat function.
@@ -656,7 +717,7 @@ static bool canRenameComdat(
return false;
// FIXME: Current only handle those Comdat groups that only containing one
- // function and function aliases.
+ // function.
// (1) For a Comdat group containing multiple functions, we need to have a
// unique postfix based on the hashes for each function. There is a
// non-trivial code refactoring to do this efficiently.
@@ -664,8 +725,7 @@ static bool canRenameComdat(
// group including global vars.
Comdat *C = F.getComdat();
for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
- if (dyn_cast<GlobalAlias>(CM.second))
- continue;
+ assert(!isa<GlobalAlias>(CM.second));
Function *FM = dyn_cast<Function>(CM.second);
if (FM != &F)
return false;
@@ -705,18 +765,8 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
- if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
- // For aliases, change the name directly.
- assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
- std::string OrigGAName = GA->getName().str();
- GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
- GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
- continue;
- }
// Must be a function.
- Function *CF = dyn_cast<Function>(CM.second);
- assert(CF);
- CF->setComdat(NewComdat);
+ cast<Function>(CM.second)->setComdat(NewComdat);
}
}
@@ -781,8 +831,11 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
if (!E->IsCritical)
return canInstrument(DestBB);
+ // Some IndirectBr critical edges cannot be split by the previous
+ // SplitIndirectBrCriticalEdges call. Bail out.
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
- BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+ BasicBlock *InstrBB =
+ isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
if (!InstrBB) {
LLVM_DEBUG(
dbgs() << "Fail to split critical edge: not instrument this edge.\n");
@@ -845,8 +898,8 @@ static void instrumentOneFunc(
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
- FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, TLI, ComdatMembers, true,
- BPI, BFI, IsCS);
+ FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
+ F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
@@ -1004,13 +1057,15 @@ public:
PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
- ProfileSummaryInfo *PSI, bool IsCS)
+ ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
: F(Func), M(Modu), BFI(BFIin), PSI(PSI),
- FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS),
+ FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
+ InstrumentFuncEntry),
FreqAttr(FFA_Normal), IsCS(IsCS) {}
// Read counts for the instrumented BB from profile.
- bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
+ bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
+ bool &AllMinusOnes);
// Populate the counts for all BBs.
void populateCounters();
@@ -1121,11 +1176,18 @@ bool PGOUseFunc::setInstrumentedCounts(
if (NumCounters != CountFromProfile.size()) {
return false;
}
+ auto *FuncEntry = &*F.begin();
+
// Set the profile count to the Instrumented BBs.
uint32_t I = 0;
for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
UseBBInfo &Info = getBBInfo(InstrBB);
+ // If we reach here, we know that we have some nonzero count
+ // values in this function. The entry count should not be 0.
+ // Fix it if necessary.
+ if (InstrBB == FuncEntry && CountValue == 0)
+ CountValue = 1;
Info.setBBInfoCount(CountValue);
}
ProfileCountSize = CountFromProfile.size();
@@ -1186,7 +1248,8 @@ void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
// Read the profile from ProfileFileName and assign the value to the
// instrumented BB and the edges. This function also updates ProgramMaxCount.
// Return true if the profile are successfully read, and false on errors.
-bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
+ bool &AllMinusOnes) {
auto &Ctx = M->getContext();
Expected<InstrProfRecord> Result =
PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
@@ -1229,10 +1292,13 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
+ AllMinusOnes = (CountFromProfile.size() > 0);
uint64_t ValueSum = 0;
for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
ValueSum += CountFromProfile[I];
+ if (CountFromProfile[I] != (uint64_t)-1)
+ AllMinusOnes = false;
}
AllZeros = (ValueSum == 0);
@@ -1316,7 +1382,6 @@ void PGOUseFunc::populateCounters() {
}
#endif
uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
- F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
uint64_t FuncMaxCount = FuncEntryCount;
for (auto &BB : F) {
auto BI = findBBInfo(&BB);
@@ -1324,6 +1389,11 @@ void PGOUseFunc::populateCounters() {
continue;
FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
}
+
+ // Fix the obviously inconsistent entry count.
+ if (FuncMaxCount > 0 && FuncEntryCount == 0)
+ FuncEntryCount = 1;
+ F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
markFunctionAttributes(FuncEntryCount, FuncMaxCount);
// Now annotate select instructions
@@ -1514,13 +1584,15 @@ static bool InstrumentAllFunctions(
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
- createIRLevelProfileFlagVar(M, /* IsCS */ false);
+ createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
for (auto &F : M) {
if (F.isDeclaration())
continue;
+ if (F.hasFnAttribute(llvm::Attribute::NoProfile))
+ continue;
auto &TLI = LookupTLI(F);
auto *BPI = LookupBPI(F);
auto *BFI = LookupBFI(F);
@@ -1532,7 +1604,7 @@ static bool InstrumentAllFunctions(
PreservedAnalyses
PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
createProfileFileNameVar(M, CSInstrName);
- createIRLevelProfileFlagVar(M, /* IsCS */ true);
+ createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
return PreservedAnalyses::all();
}
@@ -1571,6 +1643,129 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
return PreservedAnalyses::none();
}
+// Using the ratio b/w sums of profile count values and BFI count values to
+// adjust the func entry count.
+static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
+ BranchProbabilityInfo &NBPI) {
+ Function &F = Func.getFunc();
+ BlockFrequencyInfo NBFI(F, NBPI, LI);
+#ifndef NDEBUG
+ auto BFIEntryCount = F.getEntryCount();
+ assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) &&
+ "Invalid BFI Entrycount");
+#endif
+ auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
+ auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
+ for (auto &BBI : F) {
+ uint64_t CountValue = 0;
+ uint64_t BFICountValue = 0;
+ if (!Func.findBBInfo(&BBI))
+ continue;
+ auto BFICount = NBFI.getBlockProfileCount(&BBI);
+ CountValue = Func.getBBInfo(&BBI).CountValue;
+ BFICountValue = BFICount.getValue();
+ SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
+ SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
+ }
+ if (SumCount.isZero())
+ return;
+
+ assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
+ "Incorrect sum of BFI counts");
+ if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
+ return;
+ double Scale = (SumCount / SumBFICount).convertToDouble();
+ if (Scale < 1.001 && Scale > 0.999)
+ return;
+
+ uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
+ uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
+ if (NewEntryCount == 0)
+ NewEntryCount = 1;
+ if (NewEntryCount != FuncEntryCount) {
+ F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
+ LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
+ << ", entry_count " << FuncEntryCount << " --> "
+ << NewEntryCount << "\n");
+ }
+}
+
+// Compare the profile count values with BFI count values, and print out
+// the non-matching ones.
+static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
+ BranchProbabilityInfo &NBPI,
+ uint64_t HotCountThreshold,
+ uint64_t ColdCountThreshold) {
+ Function &F = Func.getFunc();
+ BlockFrequencyInfo NBFI(F, NBPI, LI);
+ // bool PrintFunc = false;
+ bool HotBBOnly = PGOVerifyHotBFI;
+ std::string Msg;
+ OptimizationRemarkEmitter ORE(&F);
+
+ unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
+ for (auto &BBI : F) {
+ uint64_t CountValue = 0;
+ uint64_t BFICountValue = 0;
+
+ if (Func.getBBInfo(&BBI).CountValid)
+ CountValue = Func.getBBInfo(&BBI).CountValue;
+
+ BBNum++;
+ if (CountValue)
+ NonZeroBBNum++;
+ auto BFICount = NBFI.getBlockProfileCount(&BBI);
+ if (BFICount)
+ BFICountValue = BFICount.getValue();
+
+ if (HotBBOnly) {
+ bool rawIsHot = CountValue >= HotCountThreshold;
+ bool BFIIsHot = BFICountValue >= HotCountThreshold;
+ bool rawIsCold = CountValue <= ColdCountThreshold;
+ bool ShowCount = false;
+ if (rawIsHot && !BFIIsHot) {
+ Msg = "raw-Hot to BFI-nonHot";
+ ShowCount = true;
+ } else if (rawIsCold && BFIIsHot) {
+ Msg = "raw-Cold to BFI-Hot";
+ ShowCount = true;
+ }
+ if (!ShowCount)
+ continue;
+ } else {
+ if ((CountValue < PGOVerifyBFICutoff) &&
+ (BFICountValue < PGOVerifyBFICutoff))
+ continue;
+ uint64_t Diff = (BFICountValue >= CountValue)
+ ? BFICountValue - CountValue
+ : CountValue - BFICountValue;
+ if (Diff < CountValue / 100 * PGOVerifyBFIRatio)
+ continue;
+ }
+ BBMisMatchNum++;
+
+ ORE.emit([&]() {
+ OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",
+ F.getSubprogram(), &BBI);
+ Remark << "BB " << ore::NV("Block", BBI.getName())
+ << " Count=" << ore::NV("Count", CountValue)
+ << " BFI_Count=" << ore::NV("Count", BFICountValue);
+ if (!Msg.empty())
+ Remark << " (" << Msg << ")";
+ return Remark;
+ });
+ }
+ if (BBMisMatchNum)
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
+ F.getSubprogram(), &F.getEntryBlock())
+ << "In Func " << ore::NV("Function", F.getName())
+ << ": Num_of_BB=" << ore::NV("Count", BBNum)
+ << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
+ << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
+ });
+}
+
static bool annotateAllFunctions(
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
@@ -1619,6 +1814,12 @@ static bool annotateAllFunctions(
collectComdatMembers(M, ComdatMembers);
std::vector<Function *> HotFunctions;
std::vector<Function *> ColdFunctions;
+
+ // If the profile marked as always instrument the entry BB, do the
+ // same. Note this can be overwritten by the internal option in CFGMST.h
+ bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
+ if (PGOInstrumentEntry.getNumOccurrences() > 0)
+ InstrumentFuncEntry = PGOInstrumentEntry;
for (auto &F : M) {
if (F.isDeclaration())
continue;
@@ -1628,9 +1829,15 @@ static bool annotateAllFunctions(
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
- PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS);
+ PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
+ InstrumentFuncEntry);
+ // When AllMinusOnes is true, it means the profile for the function
+ // is unrepresentative and this function is actually hot. Set the
+ // entry count of the function to be multiple times of hot threshold
+ // and drop all its internal counters.
+ bool AllMinusOnes = false;
bool AllZeros = false;
- if (!Func.readCounters(PGOReader.get(), AllZeros))
+ if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
continue;
if (AllZeros) {
F.setEntryCount(ProfileCount(0, Function::PCT_Real));
@@ -1638,6 +1845,15 @@ static bool annotateAllFunctions(
ColdFunctions.push_back(&F);
continue;
}
+ const unsigned MultiplyFactor = 3;
+ if (AllMinusOnes) {
+ uint64_t HotThreshold = PSI->getHotCountThreshold();
+ if (HotThreshold)
+ F.setEntryCount(
+ ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
+ HotFunctions.push_back(&F);
+ continue;
+ }
Func.populateCounters();
Func.setBranchWeights();
Func.annotateValueSites();
@@ -1675,6 +1891,23 @@ static bool annotateAllFunctions(
Func.dumpInfo();
}
}
+
+ if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
+ LoopInfo LI{DominatorTree(F)};
+ BranchProbabilityInfo NBPI(F, LI);
+
+ // Fix func entry count.
+ if (PGOFixEntryCount)
+ fixFuncEntryCount(Func, LI, NBPI);
+
+ // Verify BlockFrequency information.
+ uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
+ if (PGOVerifyHotBFI) {
+ HotCountThreshold = PSI->getOrCompHotCountThreshold();
+ ColdCountThreshold = PSI->getOrCompColdCountThreshold();
+ }
+ verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
+ }
}
// Set function hotness attribute from the profile.
@@ -1687,6 +1920,17 @@ static bool annotateAllFunctions(
<< "\n");
}
for (auto &F : ColdFunctions) {
+ // Only set when there is no Attribute::Hot set by the user. For Hot
+ // attribute, user's annotation has the precedence over the profile.
+ if (F->hasFnAttribute(Attribute::Hot)) {
+ auto &Ctx = M.getContext();
+ std::string Msg = std::string("Function ") + F->getName().str() +
+ std::string(" is annotated as a hot function but"
+ " the profile is cold");
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
+ continue;
+ }
F->addFnAttr(Attribute::Cold);
LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
<< "\n");
@@ -1772,8 +2016,6 @@ void llvm::setProfMetadata(Module *M, Instruction *TI,
dbgs() << W << " ";
} dbgs() << "\n";);
- misexpect::verifyMisExpect(TI, Weights, TI->getContext());
-
TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
if (EmitBranchProbability) {
std::string BrCondStr = getBranchCondString(TI);