Skip to content

Commit ee94157

Browse files
committed
update
1 parent 813ddc4 commit ee94157

File tree

8 files changed

+74
-122
lines changed

8 files changed

+74
-122
lines changed

paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,6 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp(
578578
op_desc->SetAttr("allow_build_at_runtime", allow_build_at_runtime);
579579
op_desc->SetAttr("shape_range_info_path", shape_range_info_path);
580580
op_desc->SetAttr("use_inspector", use_inspector);
581-
op_desc->SetAttr("model_precision", Get<int>("model_precision"));
582581
op_desc->SetAttr("with_dynamic_shape", with_dynamic_shape);
583582
op_desc->SetAttr("enable_low_precision_io", enable_low_precision_io);
584583

paddle/fluid/inference/tensorrt/convert/test_op_converter.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ limitations under the License. */
1414

1515
#include <gtest/gtest.h> // NOLINT
1616

17+
#include <memory>
18+
1719
#include "paddle/fluid/framework/program_desc.h"
1820
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
1921

@@ -28,7 +30,10 @@ TEST(OpConverter, ConvertBlock) {
2830

2931
// init trt engine
3032
std::unique_ptr<TensorRTEngine> engine_;
31-
engine_.reset(new TensorRTEngine(5, 1 << 15));
33+
TensorRTEngine::ConstructionParams params;
34+
params.max_batch_size = 5;
35+
params.max_workspace_size = 1 << 15;
36+
engine_ = std::make_unique<TensorRTEngine>(params);
3237
engine_->InitNetwork();
3338

3439
engine_->DeclareInput(

paddle/fluid/inference/tensorrt/engine.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ void TensorRTEngine::Execute(int batch_size,
125125
inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
126126
.getContextMemory(
127127
predictor_id_per_thread,
128-
phi::GPUPlace(params_.device_id),
128+
phi::GPUPlace(device_id()),
129129
phi::Stream(reinterpret_cast<phi::StreamId>(stream)));
130130
infer_context->setDeviceMemory(context_memory);
131131
}
@@ -202,7 +202,7 @@ void TensorRTEngine::FreezeNetwork() {
202202
infer_builder_config_->setMemoryPoolLimit(
203203
nvinfer1::MemoryPoolType::kWORKSPACE, params_.max_workspace_size);
204204
#else
205-
infer_builder_config_->setMaxWorkspaceSize(max_workspace_);
205+
infer_builder_config_->setMaxWorkspaceSize(params_.max_workspace_size);
206206
#endif
207207

208208
bool enable_fp16 = (precision() == phi::DataType::FLOAT16);
@@ -290,11 +290,11 @@ void TensorRTEngine::FreezeNetwork() {
290290
if (!(std::all_of(input.second.begin(),
291291
input.second.end(),
292292
[](int x) { return x > 0; }) &&
293-
std::all_of(max_input_shape_[input.first].begin(),
294-
max_input_shape_[input.first].end(),
293+
std::all_of(max_input_shape()[input.first].begin(),
294+
max_input_shape()[input.first].end(),
295295
[](int x) { return x > 0; }) &&
296-
std::all_of(optim_input_shape_[input.first].begin(),
297-
optim_input_shape_[input.first].end(),
296+
std::all_of(optim_input_shape()[input.first].begin(),
297+
optim_input_shape()[input.first].end(),
298298
[](int x) { return x > 0; }))) {
299299
continue;
300300
}
@@ -322,9 +322,9 @@ void TensorRTEngine::FreezeNetwork() {
322322
auto input_name = network()->getInput(input_id)->getName();
323323
if (!itensor_map_.count(input_name)) continue;
324324
if (!GetITensor(input_name)->isShapeTensor()) continue;
325-
PADDLE_ENFORCE_EQ(min_shape_tensor().count(input_name) &&
326-
max_shape_tensor().count(input_name) &&
327-
optim_shape_tensor().count(input_name),
325+
PADDLE_ENFORCE_EQ(min_shape_tensor().count(input_name) > 0 &&
326+
max_shape_tensor().count(input_name) > 0 &&
327+
optim_shape_tensor().count(input_name) > 0,
328328
true,
329329
platform::errors::InvalidArgument(
330330
"Fail to find min/max/optim shape value for TRT "
@@ -854,13 +854,13 @@ nvinfer1::IPluginV2Layer *TensorRTEngine::AddPluginV2IOExt(
854854
void TensorRTEngine::FreshDeviceId() {
855855
int count;
856856
cudaGetDeviceCount(&count);
857-
PADDLE_ENFORCE_LT(params_.device_id,
857+
PADDLE_ENFORCE_LT(device_id(),
858858
count,
859859
platform::errors::OutOfRange(
860860
"Device id %d exceeds the current device count: %d.",
861-
params_.device_id,
861+
device_id(),
862862
count));
863-
platform::SetDeviceId(params_.device_id);
863+
platform::SetDeviceId(device_id());
864864
}
865865

866866
void TensorRTEngine::GetEngineInfo() {

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ class TensorRTEngine {
418418
return enable_int8 && support_int8;
419419
}
420420

421-
int GetDeviceId() { return params_.device_id; }
421+
int device_id() { return params_.device_id; }
422422

423423
nvinfer1::IPluginV2Layer* AddPlugin(nvinfer1::ITensor* const* inputs,
424424
int num_inputs,

paddle/fluid/inference/tensorrt/test_dynamic_engine.cc

Lines changed: 25 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ namespace tensorrt {
3636
class TensorRTDynamicShapeValueEngineTest : public ::testing::Test {
3737
protected:
3838
void SetUp() override {
39-
ctx_ = new phi::GPUContext(platform::CUDAPlace(0));
39+
ctx_ = std::make_unique<phi::GPUContext>(platform::CUDAPlace(0));
4040
ctx_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
4141
.GetAllocator(platform::CUDAPlace(0), ctx_->stream())
4242
.get());
@@ -70,25 +70,19 @@ class TensorRTDynamicShapeValueEngineTest : public ::testing::Test {
7070
TensorRTEngine::ConstructionParams params;
7171
params.max_batch_size = 16;
7272
params.max_workspace_size = 1 << 10;
73+
params.with_dynamic_shape = true;
7374
params.min_input_shape = min_input_shape;
7475
params.max_input_shape = max_input_shape;
7576
params.optim_input_shape = optim_input_shape;
7677
params.min_shape_tensor = min_input_value;
7778
params.max_shape_tensor = max_input_value;
7879
params.optim_shape_tensor = optim_input_value;
7980

80-
engine_ = new TensorRTEngine(params, NaiveLogger::Global());
81+
engine_ = std::make_unique<TensorRTEngine>(params, NaiveLogger::Global());
8182

8283
engine_->InitNetwork();
8384
}
8485

85-
void TearDown() override {
86-
if (engine_) {
87-
delete engine_;
88-
engine_ = nullptr;
89-
}
90-
}
91-
9286
void PrepareInputOutput(const std::vector<float> &input,
9387
std::vector<int> output_shape) {
9488
paddle::framework::TensorFromVector(input, *ctx_, &input_);
@@ -105,8 +99,8 @@ class TensorRTDynamicShapeValueEngineTest : public ::testing::Test {
10599
phi::DenseTensor input_;
106100
phi::DenseTensor shape_;
107101
phi::DenseTensor output_;
108-
TensorRTEngine *engine_;
109-
phi::GPUContext *ctx_;
102+
std::unique_ptr<TensorRTEngine> engine_;
103+
std::unique_ptr<phi::GPUContext> ctx_;
110104
};
111105

112106
TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) {
@@ -166,7 +160,7 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) {
166160
class TensorRTDynamicEngineTest : public ::testing::Test {
167161
protected:
168162
void SetUp() override {
169-
ctx_ = new phi::GPUContext(platform::CUDAPlace(0));
163+
ctx_ = std::make_unique<phi::GPUContext>(platform::CUDAPlace(0));
170164
ctx_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
171165
.GetAllocator(platform::CUDAPlace(0), ctx_->stream())
172166
.get());
@@ -195,23 +189,16 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
195189
params.max_batch_size = 16;
196190
params.max_workspace_size = 1 << 10;
197191
params.with_dynamic_shape = true;
198-
params.precision = phi::Datatype::FLOAT16;
192+
params.precision = phi::DataType::FLOAT16;
199193
params.min_input_shape = min_input_shape;
200194
params.max_input_shape = max_input_shape;
201195
params.optim_input_shape = optim_input_shape;
202196

203-
engine_ = new TensorRTEngine(params, NaiveLogger::Global());
197+
engine_ = std::make_unique<TensorRTEngine>(params, NaiveLogger::Global());
204198

205199
engine_->InitNetwork();
206200
}
207201

208-
void TearDown() override {
209-
if (engine_) {
210-
delete engine_;
211-
engine_ = nullptr;
212-
}
213-
}
214-
215202
void PrepareInputOutput(const std::vector<float16> &input,
216203
std::vector<int> output_shape) {
217204
paddle::framework::TensorFromVector(input, *ctx_, &input_);
@@ -225,8 +212,8 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
225212
protected:
226213
phi::DenseTensor input_;
227214
phi::DenseTensor output_;
228-
TensorRTEngine *engine_;
229-
phi::GPUContext *ctx_;
215+
std::unique_ptr<TensorRTEngine> engine_;
216+
std::unique_ptr<phi::GPUContext> ctx_;
230217
};
231218

232219
TEST_F(TensorRTDynamicEngineTest, test_spmm) {
@@ -331,7 +318,7 @@ TEST_F(TensorRTDynamicEngineTest, test_spmm) {
331318
class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
332319
protected:
333320
void SetUp() override {
334-
ctx_ = new phi::GPUContext(platform::CUDAPlace(0));
321+
ctx_ = std::make_unique<phi::GPUContext>(platform::CUDAPlace(0));
335322
ctx_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
336323
.GetAllocator(platform::CUDAPlace(0), ctx_->stream())
337324
.get());
@@ -368,24 +355,17 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
368355
TensorRTEngine::ConstructionParams params;
369356
params.max_batch_size = 16;
370357
params.max_workspace_size = 1 << 10;
371-
params.precision = phi::Datatype::FLOAT32;
358+
params.precision = phi::DataType::FLOAT32;
372359
params.with_dynamic_shape = true;
373360
params.min_input_shape = min_input_shape;
374361
params.max_input_shape = max_input_shape;
375362
params.optim_input_shape = optim_input_shape;
376363

377-
engine_ = new TensorRTEngine(params, NaiveLogger::Global());
364+
engine_ = std::make_unique<TensorRTEngine>(params, NaiveLogger::Global());
378365

379366
engine_->InitNetwork();
380367
}
381368

382-
void TearDown() override {
383-
if (engine_) {
384-
delete engine_;
385-
engine_ = nullptr;
386-
}
387-
}
388-
389369
void PrepareInputOutput(const std::vector<std::vector<float>> inputs,
390370
std::vector<std::vector<int>> output_shapes) {
391371
LOG(INFO) << "PrepareInputOutput";
@@ -410,13 +390,12 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
410390
protected:
411391
std::vector<phi::DenseTensor> inputs_;
412392
std::vector<phi::DenseTensor> outputs_;
413-
TensorRTEngine *engine_;
414-
phi::GPUContext *ctx_;
393+
std::unique_ptr<TensorRTEngine> engine_;
394+
std::unique_ptr<phi::GPUContext> ctx_;
415395
};
416396

417397
TEST_F(TensorRTDynamicTestFusedTokenPrune, test_fused_token_prune) {
418398
#if IS_TRT_VERSION_GE(8000)
419-
tensorrt::plugin::TrtPluginRegistry::Global()->RegistToTrt();
420399
auto *attn = engine_->DeclareInput(
421400
"attn", nvinfer1::DataType::kFLOAT, nvinfer1::Dims2{-1, 4});
422401
auto *x = engine_->DeclareInput(
@@ -536,7 +515,7 @@ TEST_F(TensorRTDynamicTestFusedTokenPrune, test_fused_token_prune) {
536515
class TensorRTDynamicTestFusedTokenPruneHalf : public ::testing::Test {
537516
protected:
538517
void SetUp() override {
539-
ctx_ = new phi::GPUContext(platform::CUDAPlace(0));
518+
ctx_ = std::make_unique<phi::GPUContext>(platform::CUDAPlace(0));
540519
ctx_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
541520
.GetAllocator(platform::CUDAPlace(0), ctx_->stream())
542521
.get());
@@ -573,23 +552,16 @@ class TensorRTDynamicTestFusedTokenPruneHalf : public ::testing::Test {
573552
TensorRTEngine::ConstructionParams params;
574553
params.max_batch_size = 16;
575554
params.max_workspace_size = 1 << 10;
576-
params.precision = phi::Datatype::FLOAT16;
555+
params.precision = phi::DataType::FLOAT16;
577556
params.with_dynamic_shape = true;
578557
params.min_input_shape = min_input_shape;
579558
params.max_input_shape = max_input_shape;
580559
params.optim_input_shape = optim_input_shape;
581560

582-
engine_ = new TensorRTEngine(params, NaiveLogger::Global());
561+
engine_ = std::make_unique<TensorRTEngine>(params, NaiveLogger::Global());
583562
engine_->InitNetwork();
584563
}
585564

586-
void TearDown() override {
587-
if (engine_) {
588-
delete engine_;
589-
engine_ = nullptr;
590-
}
591-
}
592-
593565
void PrepareInputOutput(const std::vector<std::vector<float16>> inputs,
594566
std::vector<std::vector<int>> output_shapes) {
595567
LOG(INFO) << "PrepareInputOutput";
@@ -614,13 +586,12 @@ class TensorRTDynamicTestFusedTokenPruneHalf : public ::testing::Test {
614586
protected:
615587
std::vector<phi::DenseTensor> inputs_;
616588
std::vector<phi::DenseTensor> outputs_;
617-
TensorRTEngine *engine_;
618-
phi::GPUContext *ctx_;
589+
std::unique_ptr<TensorRTEngine> engine_;
590+
std::unique_ptr<phi::GPUContext> ctx_;
619591
};
620592

621593
TEST_F(TensorRTDynamicTestFusedTokenPruneHalf, test_fused_token_prune) {
622594
#if IS_TRT_VERSION_GE(8000)
623-
tensorrt::plugin::TrtPluginRegistry::Global()->RegistToTrt();
624595
auto *attn = engine_->DeclareInput(
625596
"attn", nvinfer1::DataType::kHALF, nvinfer1::Dims2{-1, 4});
626597
auto *x = engine_->DeclareInput(
@@ -740,7 +711,7 @@ TEST_F(TensorRTDynamicTestFusedTokenPruneHalf, test_fused_token_prune) {
740711
class TensorRTDynamicShapeGNTest : public ::testing::Test {
741712
protected:
742713
void SetUp() override {
743-
ctx_ = new phi::GPUContext(platform::CUDAPlace(0));
714+
ctx_ = std::make_unique<phi::GPUContext>(platform::CUDAPlace(0));
744715
ctx_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
745716
.GetAllocator(platform::CUDAPlace(0), ctx_->stream())
746717
.get());
@@ -771,24 +742,17 @@ class TensorRTDynamicShapeGNTest : public ::testing::Test {
771742
TensorRTEngine::ConstructionParams params;
772743
params.max_batch_size = 16;
773744
params.max_workspace_size = 1 << 10;
774-
params.precision = phi::Datatype::INT8;
745+
params.precision = phi::DataType::INT8;
775746
params.with_dynamic_shape = true;
776747
params.min_input_shape = min_input_shape;
777748
params.max_input_shape = max_input_shape;
778749
params.optim_input_shape = optim_input_shape;
779750

780-
engine_ = new TensorRTEngine(params);
751+
engine_ = std::make_unique<TensorRTEngine>(params, NaiveLogger::Global());
781752

782753
engine_->InitNetwork();
783754
}
784755

785-
void TearDown() override {
786-
if (engine_) {
787-
delete engine_;
788-
engine_ = nullptr;
789-
}
790-
}
791-
792756
void PrepareInputOutput(const std::vector<float> &input,
793757
std::vector<int> output_shape) {
794758
paddle::framework::TensorFromVector(input, *ctx_, &x_);
@@ -905,8 +869,8 @@ class TensorRTDynamicShapeGNTest : public ::testing::Test {
905869
protected:
906870
phi::DenseTensor x_;
907871
phi::DenseTensor y_;
908-
TensorRTEngine *engine_;
909-
phi::GPUContext *ctx_;
872+
std::unique_ptr<TensorRTEngine> engine_;
873+
std::unique_ptr<phi::GPUContext> ctx_;
910874
// case from SD
911875
int n_ = 2;
912876
int c_ = 320;
@@ -924,8 +888,6 @@ class TensorRTDynamicShapeGNTest : public ::testing::Test {
924888

925889
/*
926890
TEST_F(TensorRTDynamicShapeGNTest, test_trt_dynamic_shape_groupnorm) {
927-
tensorrt::plugin::TrtPluginRegistry::Global()->RegistToTrt();
928-
929891
float *bias = new float[c_];
930892
float *scale = new float[c_];
931893
for (int i = 0; i < c_; i++) {

0 commit comments

Comments
 (0)