Skip to content

Commit ae0f88a

Browse files
shangzhizhoushangzhizhou
and
shangzhizhou
authored
add DLA support:C++&&Python api (PaddlePaddle#30165)
* add dla * add dla done * add python api Co-authored-by: shangzhizhou <[email protected]>
1 parent 8c5f158 commit ae0f88a

File tree

10 files changed

+91
-2
lines changed

10 files changed

+91
-2
lines changed

paddle/fluid/inference/analysis/argument.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ struct Argument {
199199
DECL_ARGUMENT_FIELD(disable_trt_plugin_fp16, CloseTrtPluginFp16, bool);
200200

201201
DECL_ARGUMENT_FIELD(use_tensorrt, UseTensorRT, bool);
202+
DECL_ARGUMENT_FIELD(tensorrt_use_dla, TensorRtUseDLA, bool);
203+
DECL_ARGUMENT_FIELD(tensorrt_dla_core, TensorRtDLACore, int);
202204
DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
203205
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int);
204206
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);

paddle/fluid/inference/analysis/ir_pass_manager.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ void IRPassManager::CreatePasses(Argument *argument,
143143
argument->optim_input_shape()));
144144
pass->Set("trt_disabled_ops", new std::vector<std::string>(
145145
argument->tensorrt_disabled_ops()));
146+
pass->Set("trt_use_dla", new bool(argument->tensorrt_use_dla()));
147+
pass->Set("trt_dla_core", new int(argument->tensorrt_dla_core()));
146148
// Setting the disable_trt_plugin_fp16 to true means that TRT plugin will
147149
// not
148150
// run fp16.

paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
320320
min_input_shape, max_input_shape, opt_input_shape,
321321
disable_trt_plugin_fp16);
322322
trt_engine->SetUseOSS(Get<bool>("use_oss"));
323+
trt_engine->SetUseDLA(Get<bool>("trt_use_dla"));
324+
trt_engine->SetDLACore(Get<int>("trt_dla_core"));
323325

324326
trt_engine->SetWithErnie(
325327
graph->Has(framework::ir::kEmbEltwiseLayernormPass) &&

paddle/fluid/inference/api/analysis_config.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
126126
CP_MEMBER(tensorrt_min_subgraph_size_);
127127
CP_MEMBER(tensorrt_precision_mode_);
128128
CP_MEMBER(trt_disabled_ops_);
129+
CP_MEMBER(trt_use_dla_);
130+
CP_MEMBER(trt_dla_core_);
129131
CP_MEMBER(trt_use_static_engine_);
130132
CP_MEMBER(trt_use_calib_mode_);
131133
CP_MEMBER(trt_use_oss_);
@@ -305,6 +307,11 @@ void AnalysisConfig::SetTRTDynamicShapeInfo(
305307
disable_trt_plugin_fp16_ = disable_trt_plugin_fp16;
306308
}
307309

310+
void AnalysisConfig::EnableTensorRtDLA(int dla_core) {
311+
trt_use_dla_ = true;
312+
trt_dla_core_ = dla_core;
313+
}
314+
308315
void AnalysisConfig::Exp_DisableTensorRtOPs(
309316
const std::vector<std::string> &ops) {
310317
trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
@@ -452,6 +459,9 @@ std::string AnalysisConfig::SerializeInfoCache() {
452459
for (auto &op : trt_disabled_ops_) ss << op.c_str();
453460
ss << ";";
454461

462+
ss << trt_use_dla_;
463+
ss << trt_dla_core_;
464+
455465
ss << enable_memory_optim_;
456466

457467
ss << use_mkldnn_;

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,8 @@ void AnalysisPredictor::PrepareArgument() {
477477
argument_.SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
478478
argument_.SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_);
479479
argument_.SetTensorRtDisabledOPs(config_.trt_disabled_ops_);
480+
argument_.SetTensorRtUseDLA(config_.trt_use_dla_);
481+
argument_.SetTensorRtDLACore(config_.trt_dla_core_);
480482
argument_.SetTensorRtPrecisionMode(config_.tensorrt_precision_mode_);
481483
argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_);
482484
argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_);

paddle/fluid/inference/api/paddle_analysis_config.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,13 +326,28 @@ struct PD_INFER_DECL AnalysisConfig {
326326
/// V7.2.1 is needed.
327327
///
328328
void EnableTensorRtOSS();
329+
329330
///
330331
/// \brief A boolean state telling whether to use the TensorRT OSS.
331332
///
332333
/// \return bool Whether to use the TensorRT OSS.
333334
///
334335
bool tensorrt_oss_enabled() { return trt_use_oss_; }
335336

337+
///
338+
/// \brief Enable TensorRT DLA
339+
/// \param dla_core ID of DLACore, which should be 0, 1,
340+
/// ..., IBuilder.getNbDLACores() - 1
341+
///
342+
void EnableTensorRtDLA(int dla_core = 0);
343+
344+
///
345+
/// \brief A boolean state telling whether to use the TensorRT DLA.
346+
///
347+
/// \return bool Whether to use the TensorRT DLA.
348+
///
349+
bool tensorrt_dla_enabled() { return trt_use_dla_; }
350+
336351
///
337352
/// \brief Turn on the usage of Lite sub-graph engine.
338353
///
@@ -591,6 +606,8 @@ struct PD_INFER_DECL AnalysisConfig {
591606
bool trt_use_static_engine_{false};
592607
bool trt_use_calib_mode_{true};
593608
bool trt_use_oss_{false};
609+
bool trt_use_dla_{false};
610+
int trt_dla_core_{0};
594611
std::map<std::string, std::vector<int>> min_input_shape_{};
595612
std::map<std::string, std::vector<int>> max_input_shape_{};
596613
std::map<std::string, std::vector<int>> optim_input_shape_{};

paddle/fluid/inference/tensorrt/engine.cc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,29 @@ void TensorRTEngine::FreezeNetwork() {
176176
}
177177
}
178178

179+
if (use_dla_) {
180+
if (!enable_int8 && !enable_fp16) {
181+
LOG(WARNING) << "TensorRT DLA must be used with int8 or fp16, but you "
182+
"set float32, so DLA is not used.";
183+
} else if (infer_builder_->getNbDLACores() == 0) {
184+
LOG(WARNING)
185+
<< "TensorRT DLA is set by config, but your device does not have "
186+
"DLA, so DLA is not used.";
187+
} else {
188+
if (dla_core_ < 0 || dla_core_ >= infer_builder_->getNbDLACores()) {
189+
dla_core_ = 0;
190+
LOG(WARNING) << "Invalid DLACore, must be 0 < DLACore < "
191+
<< infer_builder_->getNbDLACores() << ", but got "
192+
<< dla_core_ << ", so use use 0 as default.";
193+
}
194+
infer_builder_->setDefaultDeviceType(nvinfer1::DeviceType::kDLA);
195+
infer_builder_->setDLACore(dla_core_);
196+
infer_builder_->allowGPUFallback(true);
197+
LOG(INFO) << "TensorRT DLA enabled in FreezeNetwork(), DLACore "
198+
<< dla_core_;
199+
}
200+
}
201+
179202
if (with_dynamic_shape_) {
180203
#if IS_TRT_VERSION_GE(6000)
181204
LOG(INFO) << "Run Paddle-TRT Dynamic Shape mode.";

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,29 @@ class TensorRTEngine {
220220
void Deserialize(const std::string& engine_serialized_data) {
221221
freshDeviceId();
222222
infer_ptr<nvinfer1::IRuntime> runtime(createInferRuntime(&logger_));
223+
224+
if (use_dla_) {
225+
if (precision_ != AnalysisConfig::Precision::kInt8 &&
226+
precision_ != AnalysisConfig::Precision::kHalf) {
227+
LOG(WARNING) << "TensorRT DLA must be used with int8 or fp16, but you "
228+
"set float32, so DLA is not used.";
229+
} else if (runtime->getNbDLACores() == 0) {
230+
LOG(WARNING)
231+
<< "TensorRT DLA is set by config, but your device does not have "
232+
"DLA, so DLA is not used.";
233+
} else {
234+
if (dla_core_ < 0 || dla_core_ >= runtime->getNbDLACores()) {
235+
dla_core_ = 0;
236+
LOG(WARNING) << "Invalid DLACore, must be 0 < DLACore < "
237+
<< runtime->getNbDLACores() << ", but got " << dla_core_
238+
<< ", so use use 0 as default.";
239+
}
240+
runtime->setDLACore(dla_core_);
241+
LOG(INFO) << "TensorRT DLA enabled in Deserialize(), DLACore "
242+
<< dla_core_;
243+
}
244+
}
245+
223246
if (with_dynamic_shape_) {
224247
#if IS_TRT_VERSION_GE(6000)
225248
infer_engine_.reset(runtime->deserializeCudaEngine(
@@ -287,6 +310,8 @@ class TensorRTEngine {
287310
}
288311

289312
void SetUseOSS(bool use_oss) { use_oss_ = use_oss; }
313+
void SetUseDLA(bool use_dla) { use_dla_ = use_dla; }
314+
void SetDLACore(int dla_core) { dla_core_ = dla_core; }
290315
void SetWithErnie(bool with_ernie) { with_ernie_ = with_ernie; }
291316

292317
void ClearWeights() {
@@ -316,8 +341,8 @@ class TensorRTEngine {
316341
ShapeMapType min_input_shape() { return min_input_shape_; }
317342
ShapeMapType max_input_shape() { return max_input_shape_; }
318343
ShapeMapType optim_input_shape() { return optim_input_shape_; }
319-
bool use_oss() { return use_oss_; };
320-
bool with_ernie() { return with_ernie_; };
344+
bool use_oss() { return use_oss_; }
345+
bool with_ernie() { return with_ernie_; }
321346
bool disable_trt_plugin_fp16() { return disable_trt_plugin_fp16_; }
322347
bool with_dynamic_shape() { return with_dynamic_shape_; }
323348

@@ -354,6 +379,8 @@ class TensorRTEngine {
354379
ShapeMapType optim_input_shape_;
355380
bool disable_trt_plugin_fp16_{false};
356381
bool use_oss_{false};
382+
bool use_dla_{false};
383+
int dla_core_{0};
357384
bool with_ernie_{false};
358385
nvinfer1::ILogger& logger_;
359386

paddle/fluid/inference/tests/api/trt_mobilenet_test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ TEST(PredictorPool, use_gpu) {
5858
config.SetModel(model_dir);
5959
config.EnableTensorRtEngine();
6060
config.Exp_DisableTensorRtOPs({"fc"});
61+
config.EnableTensorRtDLA(0);
6162
services::PredictorPool pred_pool(config, 1);
6263

6364
auto predictor = pred_pool.Retrive(0);

paddle/fluid/pybind/inference_api.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,9 @@ void BindAnalysisConfig(py::module *m) {
504504
py::arg("disable_trt_plugin_fp16") = false)
505505
.def("enable_tensorrt_oss", &AnalysisConfig::EnableTensorRtOSS)
506506
.def("tensorrt_oss_enabled", &AnalysisConfig::tensorrt_oss_enabled)
507+
.def("enable_tensorrt_dla", &AnalysisConfig::EnableTensorRtDLA,
508+
py::arg("dla_core") = 0)
509+
.def("tensorrt_dla_enabled", &AnalysisConfig::tensorrt_dla_enabled)
507510
.def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled)
508511
.def("enable_lite_engine", &AnalysisConfig::EnableLiteEngine,
509512
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,

0 commit comments

Comments
 (0)