Skip to content

Commit

Permalink
onnx implementation and PlotRuntimesMultipleParams implementation bef…
Browse files Browse the repository at this point in the history
…ore refactoring
  • Loading branch information
nprouvost committed Jan 23, 2024
1 parent 8bf0846 commit 76c5795
Show file tree
Hide file tree
Showing 11 changed files with 350 additions and 22 deletions.
6 changes: 6 additions & 0 deletions cmssw/MLProf/ONNXRuntimeModule/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<use name="FWCore/Framework" />
<use name="FWCore/PluginManager" />
<use name="FWCore/ParameterSet" />
<use name="PhysicsTools/ONNXRuntime" />

<flags EDM_PLUGIN="1" />
216 changes: 216 additions & 0 deletions cmssw/MLProf/ONNXRuntimeModule/plugins/ONNXPluginRuntime.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
* Example plugin to demonstrate the direct multi-threaded inference with ONNX Runtime.
*/

#include <chrono>
#include <fstream>
#include <list>
#include <memory>
#include <random>
#include <stdexcept>
#include <iostream>

#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/MakerMacros.h"
#include "FWCore/Framework/interface/stream/EDAnalyzer.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"

#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"

#include "MLProf/Utils/interface/utils.h"

using namespace cms::Ort;

class ONNXRuntimePlugin : public edm::stream::EDAnalyzer<edm::GlobalCache<ONNXRuntime>> {
public:
explicit ONNXRuntimePlugin(const edm::ParameterSet &, const ONNXRuntime *);
static void fillDescriptions(edm::ConfigurationDescriptions&);

static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet &);
static void globalEndJob(const ONNXRuntime *);

private:
void beginJob();
void analyze(const edm::Event&, const edm::EventSetup&);
void endJob();

inline float drawNormal() { return normalPdf_(rndGen_); }

// parameters
std::vector<std::string> inputTensorNames_;
std::vector<std::string> outputTensorNames_;
std::string outputFile_;
std::string inputTypeStr_;
std::vector<int> inputRanks_;
std::vector<int> flatInputSizes_;
int batchSize_;
int nCalls_;

// other members
int nInputs_;
int nPreCalls_;
mlprof::InputType inputType_;
std::random_device rnd_;
std::default_random_engine rndGen_;
std::normal_distribution<float> normalPdf_;

std::vector<std::vector<int64_t>> input_shapes_;
FloatArrays data_; // each stream hosts its own data
};


void ONNXRuntimePlugin::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
// defining this function will lead to a *_cfi file being generated when compiling
edm::ParameterSetDescription desc;
// the path to the file containing the graph
desc.add<std::string>("graphPath");
// the names of the input tensors
desc.add<std::vector<std::string>>("inputTensorNames");
// the names of the output tensors
desc.add<std::vector<std::string>>("outputTensorNames");
// the name of the output csv file
desc.add<std::string>("outputFile");
// the type of input values, either "incremental" or "random"
desc.add<std::string>("inputType", "random");
// the rank (number of dimensions) of each input tensor
desc.add<std::vector<int>>("inputRanks");
// flat list of sizes of each dimension of each input tensor
// (for a graph with a 1D and a 2D input tensor, this would be a vector of three values)
desc.add<std::vector<int>>("flatInputSizes");
// batch sizes to test
desc.add<int>("batchSize");
// the number of calls to the graph to measure the runtime
desc.add<int>("nCalls");

// desc.add<edm::FileInPath>("model_path", edm::FileInPath("MLProf/ONNXRuntimeModule/data/model.onnx"));
// desc.add<std::vector<std::string>>("input_names", std::vector<std::string>({"my_input"}));
descriptions.addWithDefaultLabel(desc);
}


ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig, const ONNXRuntime *cache)
: inputTensorNames_(iConfig.getParameter<std::vector<std::string>>("inputTensorNames")),
outputTensorNames_(iConfig.getParameter<std::vector<std::string>>("outputTensorNames")),
outputFile_(iConfig.getParameter<std::string>("outputFile")),
inputTypeStr_(iConfig.getParameter<std::string>("inputType")),
inputRanks_(iConfig.getParameter<std::vector<int>>("inputRanks")),
flatInputSizes_(iConfig.getParameter<std::vector<int>>("flatInputSizes")),
batchSize_(iConfig.getParameter<int>("batchSize")),
nCalls_(iConfig.getParameter<int>("nCalls")),
nInputs_(inputTensorNames_.size()),
nPreCalls_(10),
rndGen_(rnd_()),
normalPdf_(0.0, 1.0)
{
// the number of input ranks must match the number of input tensors
if ((int)inputRanks_.size() != nInputs_) {
throw cms::Exception("InvalidInputRanks") << "number of input ranks must match number of input tensors";
}
// the input must be at least 1 dimensional
for (auto rank : inputRanks_) {
if (rank < 1) {
throw cms::Exception("InvalidRank") << "only ranks above 0 are supported, got " << rank;
}
}
// the sum of ranks must match the number of flat input sizes
if (std::accumulate(inputRanks_.begin(), inputRanks_.end(), 0) != (int)flatInputSizes_.size()) {
throw cms::Exception("InvalidFlatInputSizes")
<< "sum of input ranks must match number of flat input sizes, got " << flatInputSizes_.size();
}
// batch size must be positive
if (batchSize_ < 1) {
throw cms::Exception("InvalidBatchSize") << "batch sizes must be positive, got " << batchSize_;
}

// input sizes must be positive
for (auto size : flatInputSizes_) {
if (size < 1) {
throw cms::Exception("InvalidInputSize") << "input sizes must be positive, got " << size;
}
}
// check the input type
if (inputTypeStr_ == "incremental") {
inputType_ = mlprof::InputType::Incremental;
} else if (inputTypeStr_ == "random") {
inputType_ = mlprof::InputType::Random;
} else if (inputTypeStr_ == "zeros") {
inputType_ = mlprof::InputType::Zeros;
} else {
throw cms::Exception("InvalidInputType")
<< "input type must be either 'incremental', 'zeros' or 'random', got " << inputTypeStr_;
}

// initialize the input_shapes array with inputRanks_ and flatInputSizes_
int i = 0;
for (auto rank : inputRanks_) {
std::vector<int64_t> input_shape(flatInputSizes_.begin() + i, flatInputSizes_.begin() + i + rank);
input_shape.insert(input_shape.begin(), batchSize_);
input_shapes_.push_back(input_shape);
i += rank;
}
// initialize the input data arrays
// note there is only one element in the FloatArrays type (i.e. vector<vector<float>>) variable
for (int i = 0; i < nInputs_; i++) {
data_.emplace_back(flatInputSizes_[i] * batchSize_, 0);
}
}


std::unique_ptr<ONNXRuntime> ONNXRuntimePlugin::initializeGlobalCache(const edm::ParameterSet &iConfig) {
return std::make_unique<ONNXRuntime>(edm::FileInPath(iConfig.getParameter<std::string>("graphPath")).fullPath());
}

void ONNXRuntimePlugin::globalEndJob(const ONNXRuntime *cache) {}

void ONNXRuntimePlugin::analyze(const edm::Event &iEvent, const edm::EventSetup &iSetup) {
for (int i = 0; i < nInputs_; i++) {
std::vector<float> &group_data = data_[i];
// fill the input
for (int i = 0; i < (int)group_data.size(); i++) {
group_data[i] = inputType_ == mlprof::InputType::Incremental ? float(i) :
inputType_ == mlprof::InputType::Zeros ? float(0) :
drawNormal();
}
}

// run prediction and get outputs
std::vector<std::vector<float>> outputs;

// pre calls to "warm up"
for (int r = 0; r < nPreCalls_; r++) {
outputs = globalCache()->run(inputTensorNames_, data_, input_shapes_, outputTensorNames_, batchSize_);
// std::cout << "nprerun" << r << std::endl;
}

// actual calls to measure runtimes
std::vector<float> runtimes;
for (int r = 0; r < nCalls_; r++) {
auto start = std::chrono::high_resolution_clock::now();
outputs = globalCache()->run(inputTensorNames_, data_, input_shapes_, outputTensorNames_, batchSize_);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> runtime_in_seconds = (end - start);
// std::cout << "nrun" << r << std::endl;
// std::cout << "runtime in seconds" << runtime_in_seconds.count() << std::endl;
runtimes.push_back(runtime_in_seconds.count() * 1000);
}

// // print the input and output data
// std::cout << "input data -> ";
// for ( const auto &input_tensor : data_ ){
// for ( const auto &value : input_tensor ) std::cout << value << ' ';
// std::cout << std::endl;
// }
// std::cout << std::endl << "output data -> ";
// for (auto &output_tensor: outputs) {
// for ( const auto &value : output_tensor ) std::cout << value << ' ';
// std::cout << std::endl;
// }
// std::cout << std::endl;

// save them
mlprof::writeRuntimes(outputFile_, batchSize_, runtimes);
}


DEFINE_FWK_MODULE(ONNXRuntimePlugin);
64 changes: 64 additions & 0 deletions cmssw/MLProf/ONNXRuntimeModule/test/onnx_runtime_template_cfg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# coding: utf-8

import FWCore.ParameterSet.Config as cms
from FWCore.ParameterSet.VarParsing import VarParsing

# setup minimal options
options = VarParsing("python")
options.register(
"batchSizes",
[1],
VarParsing.multiplicity.list,
VarParsing.varType.int,
"Batch sizes to be tested",
)
options.register(
"csvFile",
"results.csv",
VarParsing.multiplicity.singleton,
VarParsing.varType.string,
"The path of the csv file to save results",
)
options.parseArguments()


# define the process to run
process = cms.Process("MLPROF")

# minimal configuration
process.load("FWCore.MessageService.MessageLogger_cfi")
process.MessageLogger.cerr.FwkReport.reportEvery = 1
process.maxEvents = cms.untracked.PSet(
input=cms.untracked.int32(__N_EVENTS__), # noqa
)
process.source = cms.Source(
"PoolSource",
fileNames=cms.untracked.vstring(*__INPUT_FILES__), # noqa
)

# process options
process.options = cms.untracked.PSet(
allowUnscheduled=cms.untracked.bool(True),
wantSummary=cms.untracked.bool(False),
)

# setup options for multithreaded
process.options.numberOfThreads=cms.untracked.uint32(1)
process.options.numberOfStreams=cms.untracked.uint32(0)
process.options.numberOfConcurrentLuminosityBlocks=cms.untracked.uint32(1)


# setup MyPlugin by loading the auto-generated cfi (see MyPlugin.fillDescriptions)
process.load("MLProf.ONNXRuntimeModule.onnxRuntimePlugin_cfi")
process.onnxRuntimePlugin.graphPath = cms.string("__GRAPH_PATH__")
process.onnxRuntimePlugin.inputTensorNames = cms.vstring(__INPUT_TENSOR_NAMES__) # noqa
process.onnxRuntimePlugin.outputTensorNames = cms.vstring(__OUTPUT_TENSOR_NAMES__) # noqa
process.onnxRuntimePlugin.outputFile = cms.string(options.csvFile)
process.onnxRuntimePlugin.inputType = cms.string("__INPUT_TYPE__")
process.onnxRuntimePlugin.inputRanks = cms.vint32(__INPUT_RANKS__) # noqa
process.onnxRuntimePlugin.flatInputSizes = cms.vint32(__FLAT_INPUT_SIZES__) # noqa
process.onnxRuntimePlugin.batchSize = cms.int32(options.batchSizes[0])
process.onnxRuntimePlugin.nCalls = cms.int32(__N_CALLS__) # noqa

# define what to run in the path
process.p = cms.Path(process.onnxRuntimePlugin)
4 changes: 2 additions & 2 deletions cmssw/MLProf/RuntimeMeasurement/plugins/TFRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ TFRuntime::TFRuntime(const edm::ParameterSet& config, const tensorflow::SessionC
if ((int)inputRanks_.size() != nInputs_) {
throw cms::Exception("InvalidInputRanks") << "number of input ranks must match number of input tensors";
}
// input ranks below 1 and above 3 are not supported
// the input must be at least 1 dimensional
for (auto rank : inputRanks_) {
if (rank < 1) {
throw cms::Exception("InvalidRank") << "only ranks above 0 are supported, got " << rank;
Expand All @@ -127,7 +127,7 @@ TFRuntime::TFRuntime(const edm::ParameterSet& config, const tensorflow::SessionC
throw cms::Exception("InvalidBatchSize") << "batch sizes must be positive, got " << batchSize;
}
}
// input sizes must be postitive
// input sizes must be positive
for (auto size : flatInputSizes_) {
if (size < 1) {
throw cms::Exception("InvalidInputSize") << "input sizes must be positive, got " << size;
Expand Down
3 changes: 2 additions & 1 deletion examples/cnn/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
"name": "Identity"
}
],
"network_name": "cnn"
"network_name": "cnn",
"inference_engine": "tf"
}
3 changes: 2 additions & 1 deletion examples/dnn_2_inputs/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@
"name": "Identity"
}
],
"network_name": "dnn_2_inputs"
"network_name": "dnn_2_inputs",
"inference_engine": "tf"
}
3 changes: 2 additions & 1 deletion examples/simple_dnn/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
"name": "Identity"
}
],
"network_name": "dnn"
"network_name": "dnn",
"inference_engine": "tf"
}
16 changes: 16 additions & 0 deletions examples/simple_dnn/model_onnx.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"file": "simple_dnn.onnx",
"inputs": [
{
"name": "input_0",
"shape": [784]
}
],
"outputs": [
{
"name": "output_0"
}
],
"network_name": "dnn_onnx",
"inference_engine": "onnx"
}
Binary file added examples/simple_dnn/simple_dnn.onnx
Binary file not shown.
4 changes: 2 additions & 2 deletions mlprof/tasks/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def __init__(self, *args, **kwargs):
self.input_file = os.path.abspath(os.path.expandvars(os.path.expanduser(self.input_type)))
if not os.path.exists(self.input_file):
raise ValueError(
f"input type '{self.input_type}' is neither 'random' nor 'incremental' nor 'zeros' nor a path to an existing "
f"root file",
f"input type '{self.input_type}' is neither 'random' nor 'incremental' nor 'zeros' nor "
f"a path to an existing root file",
)

# cached model content
Expand Down
Loading

0 comments on commit 76c5795

Please sign in to comment.