Start AOT runtime measurement.

cms-ml · Apr 2, 2024 · 3988304 · 3988304
1 parent 54ba404
commit 3988304
Show file tree

Hide file tree

Showing 21 changed files with 756 additions and 83 deletions.
diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/ONNXInference.cc b/cmssw/MLProf/RuntimeMeasurement/plugins/ONNXInference.cc
@@ -31,9 +31,9 @@ class ONNXInference : public edm::stream::EDAnalyzer<edm::GlobalCache<ONNXRuntim
   static void globalEndJob(const ONNXRuntime*);
 
 private:
-  void beginJob();
+  void beginJob(){};
   void analyze(const edm::Event&, const edm::EventSetup&);
-  void endJob();
+  void endJob(){};
 
   inline float drawNormal() { return normalPdf_(rndGen_); }
 
@@ -76,10 +76,9 @@ void ONNXInference::fillDescriptions(edm::ConfigurationDescriptions& description
   // the rank (number of dimensions) of each input tensor
   desc.add<std::vector<int>>("inputRanks");
   // flat list of sizes of each dimension of each input tensor
-  // (for a graph with a 1D and a 2D input tensor, this would be a vector of
-  // three values)
+  // (for a graph with a 1D and a 2D input tensor, this would be a vector of three values)
   desc.add<std::vector<int>>("flatInputSizes");
-  // batch sizes to test
+  // batch size to test
   desc.add<int>("batchSize");
   // the number of calls to the graph to measure the runtime
   desc.add<int>("nCalls");
@@ -133,9 +132,10 @@ ONNXInference::ONNXInference(const edm::ParameterSet& iConfig, const ONNXRuntime
     inputType_ = mlprof::InputType::Random;
   } else if (inputTypeStr_ == "zeros") {
     inputType_ = mlprof::InputType::Zeros;
+  } else if (inputTypeStr_ == "ones") {
+    inputType_ = mlprof::InputType::Ones;
   } else {
-    throw cms::Exception("InvalidInputType")
-        << "input type must be either 'incremental', 'zeros' or 'random', got " << inputTypeStr_;
+    throw cms::Exception("InvalidInputType") << "input type unknown: " << inputTypeStr_;
   }
 
   // initialize the input_shapes array with inputRanks_ and flatInputSizes_
@@ -147,8 +147,7 @@ ONNXInference::ONNXInference(const edm::ParameterSet& iConfig, const ONNXRuntime
     i += rank;
   }
   // initialize the input data arrays
-  // note there is only one element in the FloatArrays type (i.e.
-  // vector<vector<float>>) variable
+  // note there is only one element in the FloatArrays type (i.e. vector<vector<float>>) variable
   for (int i = 0; i < nInputs_; i++) {
     inputArrays_.emplace_back(batchSize_ * flatInputSizes_[i], 0);
   }
@@ -167,7 +166,7 @@ void ONNXInference::analyze(const edm::Event& iEvent, const edm::EventSetup& iSe
     for (int i = 0; i < (int)group_data.size(); i++) {
       group_data[i] = inputType_ == mlprof::InputType::Incremental
                           ? float(i)
-                          : (inputType_ == mlprof::InputType::Zeros ? float(0) : drawNormal());
+                          : float(inputType_ == mlprof::InputType::Zeros ? 0 : drawNormal());
     }
   }
 
@@ -183,7 +182,10 @@ void ONNXInference::analyze(const edm::Event& iEvent, const edm::EventSetup& iSe
   std::vector<float> runtimes;
   for (int r = 0; r < nCalls_; r++) {
     auto start = std::chrono::high_resolution_clock::now();
+
+    // inference
     outputs = globalCache()->run(inputTensorNames_, inputArrays_, input_shapes_, outputTensorNames_, batchSize_);
+
     auto end = std::chrono::high_resolution_clock::now();
     std::chrono::duration<float> runtime_in_seconds = (end - start);
     runtimes.push_back(runtime_in_seconds.count() * 1000);

diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/TFInference.cc b/cmssw/MLProf/RuntimeMeasurement/plugins/TFInference.cc
@@ -29,9 +29,9 @@ class TFInference : public edm::stream::EDAnalyzer<edm::GlobalCache<tensorflow::
   static void globalEndJob(const tensorflow::SessionCache*);
 
 private:
-  void beginJob();
+  void beginJob(){};
   void analyze(const edm::Event&, const edm::EventSetup&);
-  void endJob();
+  void endJob(){};
 
   inline float drawNormal() { return normalPdf_(rndGen_); }
   tensorflow::Tensor createInputTensor(int rank, std::vector<int> shape);
@@ -83,7 +83,7 @@ void TFInference::fillDescriptions(edm::ConfigurationDescriptions& descriptions)
   // flat list of sizes of each dimension of each input tensor
   // (for a graph with a 1D and a 2D input tensor, this would be a vector of three values)
   desc.add<std::vector<int>>("flatInputSizes");
-  // batch sizes to test
+  // batch size to test
   desc.add<int>("batchSize");
   // the number of calls to the graph to measure the runtime
   desc.add<int>("nCalls");
@@ -137,16 +137,13 @@ TFInference::TFInference(const edm::ParameterSet& config, const tensorflow::Sess
     inputType_ = mlprof::InputType::Random;
   } else if (inputTypeStr_ == "zeros") {
     inputType_ = mlprof::InputType::Zeros;
+  } else if (inputTypeStr_ == "ones") {
+    inputType_ = mlprof::InputType::Ones;
   } else {
-    throw cms::Exception("InvalidInputType")
-        << "input type must be either 'incremental', 'zeros' or 'random', got " << inputTypeStr_;
+    throw cms::Exception("InvalidInputType") << "input type unknown: " << inputTypeStr_;
   }
 }
 
-void TFInference::beginJob() {}
-
-void TFInference::endJob() {}
-
 tensorflow::Tensor TFInference::createInputTensor(int rank, std::vector<int> shape) {
   // convert the shape to a tf shape
   tensorflow::TensorShape tShape;
@@ -162,7 +159,7 @@ tensorflow::Tensor TFInference::createInputTensor(int rank, std::vector<int> sha
   for (int i = 0; i < tensor.NumElements(); i++, data++) {
     *data = inputType_ == mlprof::InputType::Incremental
                 ? float(i)
-                : (inputType_ == mlprof::InputType::Zeros ? float(0) : drawNormal());
+                : float(inputType_ == mlprof::InputType::Zeros ? 0 : drawNormal());
   }
 
   return tensor;
@@ -194,7 +191,10 @@ void TFInference::analyze(const edm::Event& event, const edm::EventSetup& setup)
   std::vector<float> runtimes;
   for (int r = 0; r < nCalls_; r++) {
     auto start = std::chrono::high_resolution_clock::now();
+
+    // inference
     tensorflow::run(session_, inputs, outputTensorNames_, &outputs);
+
     auto end = std::chrono::high_resolution_clock::now();
     std::chrono::duration<float> runtime_in_seconds = (end - start);
     runtimes.push_back(runtime_in_seconds.count() * 1000);

diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/aot/BuildFile.xml b/cmssw/MLProf/RuntimeMeasurement/plugins/aot/BuildFile.xml
@@ -0,0 +1,12 @@
+<library name="MLProfRuntimeMeasurementTFAOTInference" file="TFAOTInference.cc">
+  <use name="FWCore/Framework"/>
+  <use name="FWCore/PluginManager"/>
+  <use name="FWCore/ParameterSet"/>
+
+  <use name="PhysicsTools/TensorFlowAOT"/>
+  <use name="MLProf/Utils"/>
+
+  <use name="tfaot-model-mlprof-test"/>
+
+  <flags EDM_PLUGIN="1"/>
+</library>