diff options
Diffstat (limited to 'llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp')
-rw-r--r-- | llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp new file mode 100644 index 0000000000000..ebc59879d3577 --- /dev/null +++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp @@ -0,0 +1,299 @@ +//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements feature and label extraction for offline supervised learning +// of a IR to native size model. +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" + +#ifdef LLVM_HAVE_TF_API +#include "llvm/Analysis/Utils/TFUtils.h" +#endif +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#include <algorithm> +#include <deque> + +using namespace llvm; + +AnalysisKey InlineSizeEstimatorAnalysis::Key; + +#define DEBUG_TYPE "inline-size-estimator" + +#ifdef LLVM_HAVE_TF_API +cl::opt<std::string> TFIR2NativeModelPath( + "ml-inliner-ir2native-model", cl::Hidden, + cl::desc("Path to saved model evaluating native size from IR.")); + +namespace { +unsigned getMaxInstructionID() { +#define LAST_OTHER_INST(NR) return NR; +#include "llvm/IR/Instruction.def" +} + +class IRToNativeSizeLearning { +public: + enum class NamedFeatureIndex : size_t { + InitialSize, + Blocks, + Calls, + IsLocal, + IsLinkOnceODR, + IsLinkOnce, + Loops, + MaxLoopDepth, + MaxDomTreeLevel, + + NumNamedFeatures + }; + static const size_t NumNamedFeatures = + static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); + struct FunctionFeatures { + static std::vector<std::pair<size_t, size_t>> + ImportantInstructionSuccessions; + static const size_t FeatureCount; + + std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; + std::vector<int32_t> InstructionHistogram; + std::vector<int32_t> InstructionPairHistogram; + + void fillTensor(int32_t *Ptr) const; + int32_t &operator[](NamedFeatureIndex Pos) { + return NamedFeatures[static_cast<size_t>(Pos)]; + } + }; + IRToNativeSizeLearning() = default; + + static FunctionFeatures getFunctionFeatures(Function &F, + FunctionAnalysisManager &FAM); + +private: + /// Sort once the feature tuples. + struct SortFeatureTuples { + bool IsSorted = false; + SortFeatureTuples() { + std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(), + FunctionFeatures::ImportantInstructionSuccessions.end()); + IsSorted = true; + } + }; + + static llvm::ManagedStatic<SortFeatureTuples> TupleSorter; + + static bool ensureSortedTuples() { return TupleSorter->IsSorted; } +}; +llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples> + IRToNativeSizeLearning::TupleSorter; + +// This is a point in time - we determined including these pairs of +// consecutive instructions (in the IR layout available at inline time) as +// features improves the model performance. We want to move away from manual +// feature selection. +// The vector is given in opcode pairs rather than labels because 1) labels +// weren't readily available, and 2) the successions were hand - extracted +std::vector<std::pair<size_t, size_t>> + IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions = + {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48}, + {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34}, + {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34}, + {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56}, + {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31}, + {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1}, + {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34}, + {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39}, + {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29}, + {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30}, + {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56}, + {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28}, + {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53}, + {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2}, + {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56}, + {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1}, + {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32}, + {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24}, + {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22}, + {1, 27}, {49, 1}, {1, 8}, {56, 2}}; + +// We have: 9 calculated features (the features here); 1 feature for each +// instruction opcode; and 1 feature for each manually-identified sequence. +// For the latter 2, we build a histogram: we count the number of +// occurrences of each instruction opcode or succession of instructions, +// respectively. +// Note that instruction opcodes start from 1. For convenience, we also have an +// always 0 feature for the '0' opcode, hence the extra 1. +const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = + IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions + .size() + + getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures; + +size_t getSize(Function &F, TargetTransformInfo &TTI) { + size_t Ret = 0; + for (auto &BB : F) + for (auto &I : BB) + Ret += TTI.getInstructionCost( + &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize); + return Ret; +} + +size_t getSize(Function &F, FunctionAnalysisManager &FAM) { + auto &TTI = FAM.getResult<TargetIRAnalysis>(F); + return getSize(F, TTI); +} + +unsigned getMaxDominatorTreeDepth(const Function &F, + const DominatorTree &Tree) { + unsigned Ret = 0; + for (auto &BB : F) + if (auto *TN = Tree.getNode(&BB)) + Ret = std::max(Ret, TN->getLevel()); + return Ret; +} +} // namespace + +IRToNativeSizeLearning::FunctionFeatures +IRToNativeSizeLearning::getFunctionFeatures(Function &F, + FunctionAnalysisManager &FAM) { + assert(ensureSortedTuples() && "expected lazy initialization"); + + auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); + FunctionFeatures FF; + size_t InstrCount = getMaxInstructionID() + 1; + FF.InstructionHistogram.resize(InstrCount); + + FF.InstructionPairHistogram.resize( + FunctionFeatures::ImportantInstructionSuccessions.size()); + + auto StartID = 0; + auto LastID = StartID; + auto getPairIndex = [](size_t a, size_t b) { + auto I = + std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(), + FunctionFeatures::ImportantInstructionSuccessions.end(), + std::make_pair(a, b)); + if (I == FunctionFeatures::ImportantInstructionSuccessions.end()) + return -1; + return static_cast<int>(std::distance( + FunctionFeatures::ImportantInstructionSuccessions.begin(), I)); + }; + + // We don't want debug calls, because they'd just add noise. + for (auto &BB : F) { + for (auto I = BB.instructionsWithoutDebug().begin(), + E = BB.instructionsWithoutDebug().end(); + I != E; ++I) { + auto ID = I->getOpcode(); + + ++FF.InstructionHistogram[ID]; + int PairIndex = getPairIndex(LastID, ID); + if (PairIndex >= 0) + ++FF.InstructionPairHistogram[PairIndex]; + LastID = ID; + if (isa<CallBase>(*I)) + ++FF[NamedFeatureIndex::Calls]; + } + } + + FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); + FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); + FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); + FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); + FF[NamedFeatureIndex::Blocks] = + std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end()); + auto &LI = FAM.getResult<LoopAnalysis>(F); + FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); + for (auto &L : LI) + FF[NamedFeatureIndex::MaxLoopDepth] = + std::max(FF[NamedFeatureIndex::MaxLoopDepth], + static_cast<int32_t>(L->getLoopDepth())); + FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); + return FF; +} + +void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { + std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); + Ptr += NamedFeatures.size(); + std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); + Ptr += InstructionHistogram.size(); + std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), + Ptr); +} + +bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { + return !TFIR2NativeModelPath.empty(); +} + +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { + if (!isEvaluatorRequested()) { + return; + } + std::vector<std::string> InputNames{"serving_default_input_1"}; + std::vector<std::string> OutputName{"StatefulPartitionedCall"}; + Evaluator = std::make_unique<TFModelEvaluator>( + TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName); + if (!Evaluator || !Evaluator->isValid()) { + Evaluator.reset(); + return; + } + static const std::vector<int64_t> Dim{ + 1, static_cast<int64_t>( + IRToNativeSizeLearning::FunctionFeatures::FeatureCount)}; + + Evaluator->initInput<int32_t>(0, Dim); +} + +InlineSizeEstimatorAnalysis::Result +InlineSizeEstimatorAnalysis::run(const Function &F, + FunctionAnalysisManager &FAM) { + if (!Evaluator) + return None; + auto Features = IRToNativeSizeLearning::getFunctionFeatures( + const_cast<Function &>(F), FAM); + int32_t *V = Evaluator->getInput<int32_t>(0); + Features.fillTensor(V); + auto ER = Evaluator->evaluate(); + if (!ER) + return None; + float Ret = *ER->getTensorValue<float>(0); + if (Ret < 0.0) + Ret = 0.0; + return static_cast<size_t>(Ret); +} + +InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( + InlineSizeEstimatorAnalysis &&Other) + : Evaluator(std::move(Other.Evaluator)) {} + +#else +namespace llvm { +class TFModelEvaluator {}; +} // namespace llvm +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( + InlineSizeEstimatorAnalysis &&) {} +InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis::Result +InlineSizeEstimatorAnalysis::run(const Function &F, + FunctionAnalysisManager &FAM) { + return None; +} +bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } +#endif
\ No newline at end of file |