1 files changed, 299 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
new file mode 100644
index 0000000000000..ebc59879d3577
--- /dev/null
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -0,0 +1,299 @@
+//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements feature and label extraction for offline supervised learning
+// of a IR to native size model.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+
+#ifdef LLVM_HAVE_TF_API
+#include "llvm/Analysis/Utils/TFUtils.h"
+#endif
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <deque>
+
+using namespace llvm;
+
+AnalysisKey InlineSizeEstimatorAnalysis::Key;
+
+#define DEBUG_TYPE "inline-size-estimator"
+
+#ifdef LLVM_HAVE_TF_API
+cl::opt<std::string> TFIR2NativeModelPath(
+    "ml-inliner-ir2native-model", cl::Hidden,
+    cl::desc("Path to saved model evaluating native size from IR."));
+
+namespace {
+unsigned getMaxInstructionID() {
+#define LAST_OTHER_INST(NR) return NR;
+#include "llvm/IR/Instruction.def"
+}
+
+class IRToNativeSizeLearning {
+public:
+  enum class NamedFeatureIndex : size_t {
+    InitialSize,
+    Blocks,
+    Calls,
+    IsLocal,
+    IsLinkOnceODR,
+    IsLinkOnce,
+    Loops,
+    MaxLoopDepth,
+    MaxDomTreeLevel,
+
+    NumNamedFeatures
+  };
+  static const size_t NumNamedFeatures =
+      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
+  struct FunctionFeatures {
+    static std::vector<std::pair<size_t, size_t>>
+        ImportantInstructionSuccessions;
+    static const size_t FeatureCount;
+
+    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
+    std::vector<int32_t> InstructionHistogram;
+    std::vector<int32_t> InstructionPairHistogram;
+
+    void fillTensor(int32_t *Ptr) const;
+    int32_t &operator[](NamedFeatureIndex Pos) {
+      return NamedFeatures[static_cast<size_t>(Pos)];
+    }
+  };
+  IRToNativeSizeLearning() = default;
+
+  static FunctionFeatures getFunctionFeatures(Function &F,
+                                              FunctionAnalysisManager &FAM);
+
+private:
+  /// Sort once the feature tuples.
+  struct SortFeatureTuples {
+    bool IsSorted = false;
+    SortFeatureTuples() {
+      std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
+                FunctionFeatures::ImportantInstructionSuccessions.end());
+      IsSorted = true;
+    }
+  };
+
+  static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
+
+  static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
+};
+llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
+    IRToNativeSizeLearning::TupleSorter;
+
+// This is a point in time - we determined including these pairs of
+// consecutive instructions (in the IR layout available at inline time) as
+// features improves the model performance. We want to move away from manual
+// feature selection.
+// The vector is given in opcode pairs rather than labels because 1) labels
+// weren't readily available, and 2) the successions were hand - extracted
+std::vector<std::pair<size_t, size_t>>
+    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
+        {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
+         {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
+         {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
+         {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
+         {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
+         {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
+         {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
+         {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
+         {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
+         {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
+         {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
+         {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
+         {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
+         {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
+         {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
+         {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
+         {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
+         {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
+         {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
+         {1, 27},  {49, 1},  {1, 8},   {56, 2}};
+
+// We have: 9 calculated features (the features here); 1 feature for each
+// instruction opcode; and 1 feature for each manually-identified sequence.
+// For the latter 2, we build a histogram: we count the number of
+// occurrences of each instruction opcode or succession of instructions,
+// respectively.
+// Note that instruction opcodes start from 1. For convenience, we also have an
+// always 0 feature for the '0' opcode, hence the extra 1.
+const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
+    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
+        .size() +
+    getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
+
+size_t getSize(Function &F, TargetTransformInfo &TTI) {
+  size_t Ret = 0;
+  for (auto &BB : F)
+    for (auto &I : BB)
+      Ret += TTI.getInstructionCost(
+          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
+  return Ret;
+}
+
+size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
+  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+  return getSize(F, TTI);
+}
+
+unsigned getMaxDominatorTreeDepth(const Function &F,
+                                  const DominatorTree &Tree) {
+  unsigned Ret = 0;
+  for (auto &BB : F)
+    if (auto *TN = Tree.getNode(&BB))
+      Ret = std::max(Ret, TN->getLevel());
+  return Ret;
+}
+} // namespace
+
+IRToNativeSizeLearning::FunctionFeatures
+IRToNativeSizeLearning::getFunctionFeatures(Function &F,
+                                            FunctionAnalysisManager &FAM) {
+  assert(ensureSortedTuples() && "expected lazy initialization");
+
+  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
+  FunctionFeatures FF;
+  size_t InstrCount = getMaxInstructionID() + 1;
+  FF.InstructionHistogram.resize(InstrCount);
+
+  FF.InstructionPairHistogram.resize(
+      FunctionFeatures::ImportantInstructionSuccessions.size());
+
+  auto StartID = 0;
+  auto LastID = StartID;
+  auto getPairIndex = [](size_t a, size_t b) {
+    auto I =
+        std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
+                  FunctionFeatures::ImportantInstructionSuccessions.end(),
+                  std::make_pair(a, b));
+    if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
+      return -1;
+    return static_cast<int>(std::distance(
+        FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
+  };
+
+  // We don't want debug calls, because they'd just add noise.
+  for (auto &BB : F) {
+    for (auto I = BB.instructionsWithoutDebug().begin(),
+              E = BB.instructionsWithoutDebug().end();
+         I != E; ++I) {
+      auto ID = I->getOpcode();
+
+      ++FF.InstructionHistogram[ID];
+      int PairIndex = getPairIndex(LastID, ID);
+      if (PairIndex >= 0)
+        ++FF.InstructionPairHistogram[PairIndex];
+      LastID = ID;
+      if (isa<CallBase>(*I))
+        ++FF[NamedFeatureIndex::Calls];
+    }
+  }
+
+  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
+  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
+  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
+  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
+  FF[NamedFeatureIndex::Blocks] =
+      std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
+  auto &LI = FAM.getResult<LoopAnalysis>(F);
+  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
+  for (auto &L : LI)
+    FF[NamedFeatureIndex::MaxLoopDepth] =
+        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
+                 static_cast<int32_t>(L->getLoopDepth()));
+  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
+  return FF;
+}
+
+void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
+  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
+  Ptr += NamedFeatures.size();
+  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
+  Ptr += InstructionHistogram.size();
+  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
+            Ptr);
+}
+
+bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
+  return !TFIR2NativeModelPath.empty();
+}
+
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
+  if (!isEvaluatorRequested()) {
+    return;
+  }
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  Evaluator = std::make_unique<TFModelEvaluator>(
+      TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
+  if (!Evaluator || !Evaluator->isValid()) {
+    Evaluator.reset();
+    return;
+  }
+  static const std::vector<int64_t> Dim{
+      1, static_cast<int64_t>(
+             IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
+
+  Evaluator->initInput<int32_t>(0, Dim);
+}
+
+InlineSizeEstimatorAnalysis::Result
+InlineSizeEstimatorAnalysis::run(const Function &F,
+                                 FunctionAnalysisManager &FAM) {
+  if (!Evaluator)
+    return None;
+  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
+      const_cast<Function &>(F), FAM);
+  int32_t *V = Evaluator->getInput<int32_t>(0);
+  Features.fillTensor(V);
+  auto ER = Evaluator->evaluate();
+  if (!ER)
+    return None;
+  float Ret = *ER->getTensorValue<float>(0);
+  if (Ret < 0.0)
+    Ret = 0.0;
+  return static_cast<size_t>(Ret);
+}
+
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
+    InlineSizeEstimatorAnalysis &&Other)
+    : Evaluator(std::move(Other.Evaluator)) {}
+
+#else
+namespace llvm {
+class TFModelEvaluator {};
+} // namespace llvm
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
+    InlineSizeEstimatorAnalysis &&) {}
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::Result
+InlineSizeEstimatorAnalysis::run(const Function &F,
+                                 FunctionAnalysisManager &FAM) {
+  return None;
+}
+bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
+#endif
+\ No newline at end of file