Commit caf395ee authored by Mircea Trofin's avatar Mircea Trofin
Browse files

Reapply "[llvm] Native size estimator for training -Oz inliner"

This reverts commit 9908a3b9.

The fix was to exclude the content of TFUtils.h (automatically
included in the LLVM_Analysis module, when LLVM_ENABLE_MODULES is enabled).

Differential Revision: https://reviews.llvm.org/D82817
parent 66550c36
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -981,6 +981,18 @@ if (NOT TENSORFLOW_AOT_PATH STREQUAL "")
    ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime)
endif()

set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install")
find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib)

# Similar to the above Tensorflow dependency, please refer to the same script.
# In this case, the latest C API library is available for download from
# https://www.tensorflow.org/install/lang_c
if (tensorflow_c_api)
  set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available")
  add_definitions("-DLLVM_HAVE_TF_API")
  include_directories(${TENSORFLOW_C_LIB_PATH}/include)
endif()

# Put this before tblgen. Else we have a circular dependence.
add_subdirectory(lib/Demangle)
add_subdirectory(lib/Support)
+35 −0
Original line number Diff line number Diff line
//===- InlineSizeEstimatorAnalysis.h - ML size estimator --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//

#ifndef LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
#define LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H

#include "llvm/IR/PassManager.h"

namespace llvm {
class Function;

class TFModelEvaluator;
class InlineSizeEstimatorAnalysis
    : public AnalysisInfoMixin<InlineSizeEstimatorAnalysis> {
public:
  InlineSizeEstimatorAnalysis();
  InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&);
  ~InlineSizeEstimatorAnalysis();

  static AnalysisKey Key;
  using Result = Optional<size_t>;
  Result run(const Function &F, FunctionAnalysisManager &FAM);
  static bool isEvaluatorRequested();

private:
  std::unique_ptr<TFModelEvaluator> Evaluator;
};
} // namespace llvm
#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
 No newline at end of file
+138 −0
Original line number Diff line number Diff line
//===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H
#define LLVM_ANALYSIS_UTILS_TFUTILS_H

#ifdef LLVM_HAVE_TF_API
#include "tensorflow/c/c_api.h"
#include "llvm/IR/LLVMContext.h"

#include <memory>
#include <vector>

namespace llvm {

/// Load a SavedModel, find the given inputs and outputs, and setup storage
/// for input tensors. The user is responsible for correctly dimensioning the
/// input tensors and setting their values before calling evaluate().
/// To initialize:
/// - construct the object
/// - initialize the input tensors using initInput. Indices must correspond to
///   indices in the InputNames used at construction.
/// To use:
/// - set input values by using getInput to get each input tensor, and then
///   setting internal scalars, for all dimensions (tensors are row-major:
///   https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205)
/// - prepare an output vector of TF_Output* type, with the correct number of
/// outputs (i.e. same as OutputNames). Initialize the vector with nullptr
/// values.
/// - call evaluate. The input tensors' values are not consumed after this, and
///   may still be read.
/// - use the outputs in the output vector
/// - deallocate each output tensor in the output vector, using TF_DeleteTensor.
class TFModelEvaluator final {
public:
  /// The result of a model evaluation. Handles the lifetime of the output
  /// TF_Tensor objects, which means that their values need to be used before
  /// the EvaluationResult's dtor is called.
  class EvaluationResult {
  public:
    ~EvaluationResult() {
      for (auto *P : Output)
        if (P)
          TF_DeleteTensor(P);
    }

    EvaluationResult(const EvaluationResult &) = delete;
    EvaluationResult(EvaluationResult &&Other)
        : OutputSize(Other.OutputSize), Output(std::move(Other.Output)) {
      Other.Output.clear();
    };

    /// Get a pointer to the first element of the tensor at Index.
    template <typename T> T *getTensorValue(size_t Index) {
      return static_cast<T *>(TF_TensorData(Output[Index]));
    }

  private:
    friend class TFModelEvaluator;
    EvaluationResult(size_t OutputSize)
        : OutputSize(OutputSize), Output(OutputSize){};

    const size_t OutputSize;
    std::vector<TF_Tensor *> Output;
  };

  using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
  using TFSessionOptionsPtr =
      std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
  using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;

  TFModelEvaluator(StringRef SavedModelPath,
                   const std::vector<std::string> &InputNames,
                   const std::vector<std::string> &OutputNames,
                   const char *Tags = "serve");
  ~TFModelEvaluator();
  TFModelEvaluator(const TFModelEvaluator &) = delete;
  TFModelEvaluator(TFModelEvaluator &&) = delete;

  /// Evaluate the model, assuming it is valid. Returns None if the evaluation
  /// fails or the model is invalid, or an EvaluationResult otherwise. The
  /// inputs are assumed to have been already provided via getInput(). When
  /// returning None, it also marks the object invalid. Pass an Output vector
  /// with the same size as OutputNames, but with nullptr values. evaluate()
  /// will populate it with tensors, matching in index the corresponding
  /// OutputNames. The caller is responsible for the deallocation of those
  /// tensors, using TF_DeleteTensor.
  Optional<EvaluationResult> evaluate();

  /// Provides access to the input vector. It is already dimensioned correctly,
  /// but the values need to be allocated by the user.
  std::vector<TF_Tensor *> &getInput() { return Input; }

  /// Returns true if the tensorflow model was loaded successfully, false
  /// otherwise.
  bool isValid() const { return !!Session; }

  /// Initialize the input at Index as a tensor of the given type and dimensions
  void initInput(int Index, TF_DataType Type,
                 const std::vector<int64_t> &Dimensions);

private:
  /// The objects necessary for carrying out an evaluation of the SavedModel.
  /// They are expensive to set up, and we maintain them accross all the
  /// evaluations of the model.
  TF_Session *Session = nullptr;
  TFGraphPtr Graph;
  TFSessionOptionsPtr Options;

  /// The specification of the input nodes.
  std::vector<TF_Output> InputFeed;

  /// The input tensors. They must match by index of the corresponding InputFeed
  /// value. We set up the tensors once and just mutate theirs scalars before
  /// each evaluation. The input tensors keep their value after an evaluation.
  std::vector<TF_Tensor *> Input;

  /// The specification of the output nodes. When evaluating, the tensors in the
  /// output tensor vector must match by index the corresponding element in the
  /// OutputFeed.
  std::vector<TF_Output> OutputFeed;

  /// Reusable utility for deleting the session.
  void deleteSession();

  /// Reusable utility for ensuring we can bind the requested Name to a node in
  /// the SavedModel Graph.
  bool checkReportAndReset(const TF_Output &Output, StringRef Name);
};
} // namespace llvm

#endif // LLVM_HAVE_TF_API
#endif // LLVM_ANALYSIS_UTILS_TFUTILS_H
+31 −9
Original line number Diff line number Diff line
set(CommonMLSources MLInlineAdvisor.cpp)
set(ReleaseModeMLSources ReleaseModeModelRunner.cpp)
set(DevelopmentModeMLSources TFUtils.cpp)

if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)
  set(MLPolicySources ${CommonMLSources})
  if (DEFINED LLVM_HAVE_TF_AOT)
    include(TensorFlowCompile)
    tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
@@ -8,11 +11,26 @@ if (DEFINED LLVM_HAVE_TF_AOT)
      $<TARGET_OBJECTS:tf_xla_runtime_objects>
      ${GENERATED_OBJS}
    )
  set(MLPolicySources ${CommonMLSources} ${ReleaseModeMLSources})
    LIST(APPEND MLPolicySources ${ReleaseModeMLSources})
  else()
  set(LLVM_OPTIONAL_SOURCES ${CommonMLSources} ${ReleaseModeMLSources})
    LIST(APPEND LLVM_OPTIONAL_SOURCES ${ReleaseModeMLSources})
  endif()

  if (DEFINED LLVM_HAVE_TF_API)
    LIST(APPEND MLPolicySources ${DevelopmentModeMLSources})
    LIST(APPEND MLLinkDeps ${tensorflow_c_api})
  else()
    LIST(APPEND LLVM_OPTIONAL_SOURCES ${DevelopmentModeMLSources})
  endif()
else()
  LIST(APPEND LLVM_OPTIONAL_SOURCES 
    ${CommonMLSources}
    ${DevelopmentModeMLSources}
    ${ReleaseModeMLSources}
    )
endif()
  

add_llvm_component_library(LLVMAnalysis
  AliasAnalysis.cpp
  AliasAnalysisEvaluator.cpp
@@ -57,6 +75,7 @@ add_llvm_component_library(LLVMAnalysis
  InlineCost.cpp
  InlineAdvisor.cpp
  InlineFeaturesAnalysis.cpp
  InlineSizeEstimatorAnalysis.cpp
  InstCount.cpp
  InstructionPrecedenceTracking.cpp
  InstructionSimplify.cpp
@@ -124,4 +143,7 @@ add_llvm_component_library(LLVMAnalysis

  DEPENDS
  intrinsics_gen

  LINK_LIBS
  ${MLLinkDeps}
  )
+299 −0
Original line number Diff line number Diff line
//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements feature and label extraction for offline supervised learning
// of a IR to native size model.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"

#ifdef LLVM_HAVE_TF_API
#include "llvm/Analysis/Utils/TFUtils.h"
#endif
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"

#include <algorithm>
#include <deque>

using namespace llvm;

AnalysisKey InlineSizeEstimatorAnalysis::Key;

#define DEBUG_TYPE "inline-size-estimator"

#ifdef LLVM_HAVE_TF_API
cl::opt<std::string> TFIR2NativeModelPath(
    "ml-inliner-ir2native-model", cl::Hidden,
    cl::desc("Path to saved model evaluating native size from IR."));

namespace {
unsigned getMaxInstructionID() {
#define LAST_OTHER_INST(NR) return NR;
#include "llvm/IR/Instruction.def"
}

class IRToNativeSizeLearning {
public:
  enum class NamedFeatureIndex : size_t {
    InitialSize,
    Blocks,
    Calls,
    IsLocal,
    IsLinkOnceODR,
    IsLinkOnce,
    Loops,
    MaxLoopDepth,
    MaxDomTreeLevel,

    NumNamedFeatures
  };
  static const size_t NumNamedFeatures =
      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
  struct FunctionFeatures {
    static std::vector<std::pair<size_t, size_t>>
        ImportantInstructionSuccessions;
    static const size_t FeatureCount;

    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
    std::vector<int32_t> InstructionHistogram;
    std::vector<int32_t> InstructionPairHistogram;

    void fillTensor(int32_t *Ptr) const;
    int32_t &operator[](NamedFeatureIndex Pos) {
      return NamedFeatures[static_cast<size_t>(Pos)];
    }
  };
  IRToNativeSizeLearning() = default;

  static FunctionFeatures getFunctionFeatures(Function &F,
                                              FunctionAnalysisManager &FAM);

private:
  /// Sort once the feature tuples.
  struct SortFeatureTuples {
    bool IsSorted = false;
    SortFeatureTuples() {
      std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
                FunctionFeatures::ImportantInstructionSuccessions.end());
      IsSorted = true;
    }
  };

  static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;

  static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
};
llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
    IRToNativeSizeLearning::TupleSorter;

// This is a point in time - we determined including these pairs of
// consecutive instructions (in the IR layout available at inline time) as
// features improves the model performance. We want to move away from manual
// feature selection.
// The vector is given in opcode pairs rather than labels because 1) labels
// weren't readily available, and 2) the successions were hand - extracted
std::vector<std::pair<size_t, size_t>>
    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
        {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
         {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
         {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
         {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
         {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
         {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
         {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
         {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
         {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
         {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
         {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
         {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
         {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
         {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
         {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
         {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
         {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
         {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
         {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
         {1, 27},  {49, 1},  {1, 8},   {56, 2}};

// We have: 9 calculated features (the features here); 1 feature for each
// instruction opcode; and 1 feature for each manually-identified sequence.
// For the latter 2, we build a histogram: we count the number of
// occurrences of each instruction opcode or succession of instructions,
// respectively.
// Note that instruction opcodes start from 1. For convenience, we also have an
// always 0 feature for the '0' opcode, hence the extra 1.
const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
        .size() +
    getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;

size_t getSize(Function &F, TargetTransformInfo &TTI) {
  size_t Ret = 0;
  for (auto &BB : F)
    for (auto &I : BB)
      Ret += TTI.getInstructionCost(
          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
  return Ret;
}

size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
  return getSize(F, TTI);
}

unsigned getMaxDominatorTreeDepth(const Function &F,
                                  const DominatorTree &Tree) {
  unsigned Ret = 0;
  for (auto &BB : F)
    if (auto *TN = Tree.getNode(&BB))
      Ret = std::max(Ret, TN->getLevel());
  return Ret;
}
} // namespace

IRToNativeSizeLearning::FunctionFeatures
IRToNativeSizeLearning::getFunctionFeatures(Function &F,
                                            FunctionAnalysisManager &FAM) {
  assert(ensureSortedTuples() && "expected lazy initialization");

  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
  FunctionFeatures FF;
  size_t InstrCount = getMaxInstructionID() + 1;
  FF.InstructionHistogram.resize(InstrCount);

  FF.InstructionPairHistogram.resize(
      FunctionFeatures::ImportantInstructionSuccessions.size());

  auto StartID = 0;
  auto LastID = StartID;
  auto getPairIndex = [](size_t a, size_t b) {
    auto I =
        std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
                  FunctionFeatures::ImportantInstructionSuccessions.end(),
                  std::make_pair(a, b));
    if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
      return -1;
    return static_cast<int>(std::distance(
        FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
  };

  // We don't want debug calls, because they'd just add noise.
  for (auto &BB : F) {
    for (auto I = BB.instructionsWithoutDebug().begin(),
              E = BB.instructionsWithoutDebug().end();
         I != E; ++I) {
      auto ID = I->getOpcode();

      ++FF.InstructionHistogram[ID];
      int PairIndex = getPairIndex(LastID, ID);
      if (PairIndex >= 0)
        ++FF.InstructionPairHistogram[PairIndex];
      LastID = ID;
      if (isa<CallBase>(*I))
        ++FF[NamedFeatureIndex::Calls];
    }
  }

  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
  FF[NamedFeatureIndex::Blocks] =
      std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
  auto &LI = FAM.getResult<LoopAnalysis>(F);
  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
  for (auto &L : LI)
    FF[NamedFeatureIndex::MaxLoopDepth] =
        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
                 static_cast<int32_t>(L->getLoopDepth()));
  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
  return FF;
}

void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
  Ptr += NamedFeatures.size();
  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
  Ptr += InstructionHistogram.size();
  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
            Ptr);
}

bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
  return !TFIR2NativeModelPath.empty();
}

InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
  if (!isEvaluatorRequested()) {
    return;
  }
  std::vector<std::string> InputNames{"serving_default_input_1"};
  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
  Evaluator = std::make_unique<TFModelEvaluator>(
      TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
  if (!Evaluator || !Evaluator->isValid()) {
    Evaluator.reset();
    return;
  }
  static const std::vector<int64_t> Dim{
      1, static_cast<int64_t>(
             IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};

  Evaluator->initInput(0, TF_INT32, Dim);
}

InlineSizeEstimatorAnalysis::Result
InlineSizeEstimatorAnalysis::run(const Function &F,
                                 FunctionAnalysisManager &FAM) {
  if (!Evaluator)
    return None;
  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
      const_cast<Function &>(F), FAM);
  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator->getInput()[0]));
  Features.fillTensor(V);
  auto ER = Evaluator->evaluate();
  if (!ER)
    return None;
  float Ret = *ER->getTensorValue<float>(0);
  if (Ret < 0.0)
    Ret = 0.0;
  return static_cast<size_t>(Ret);
}

InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
    InlineSizeEstimatorAnalysis &&Other)
    : Evaluator(std::move(Other.Evaluator)) {}

#else
namespace llvm {
class TFModelEvaluator {};
} // namespace llvm
InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
    InlineSizeEstimatorAnalysis &&) {}
InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis::Result
InlineSizeEstimatorAnalysis::run(const Function &F,
                                 FunctionAnalysisManager &FAM) {
  return None;
}
bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
#endif
 No newline at end of file
Loading