PaddlePaddle · cryoco · Jun 12, 2021 · Jun 7, 2021 · Jun 8, 2021 · Jun 8, 2021
diff --git a/paddle/fluid/inference/tensorrt/convert/flatten_op.cc b/paddle/fluid/inference/tensorrt/convert/flatten_op.cc
@@ -25,7 +25,7 @@ namespace inference {
 namespace tensorrt {
 
 /*
- * FlattenOp, only support static shape mode currently.
+ * FlattenOp trt converter
  */
 class FlattenOpConverter : public OpConverter {
  public:
@@ -35,21 +35,48 @@ class FlattenOpConverter : public OpConverter {
     // Declare inputs
     auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
     int dims = input->getDimensions().nbDims;
+    nvinfer1::IShuffleLayer* layer = nullptr;
+    if (!engine_->with_dynamic_shape()) {
+      int dim_prod = 1;
+      for (int i = 0; i < dims; i++) {
+        int dim_i = input->getDimensions().d[i];
+        PADDLE_ENFORCE_GT(
+            dim_i, 0,
+            platform::errors::InvalidArgument(
+                "flatten input dim should be > 0, but got %d.", dim_i));
+        dim_prod *= dim_i;
+      }
+      nvinfer1::Dims flatten_dim;
+      flatten_dim.nbDims = 1;
+      flatten_dim.d[0] = dim_prod;
+      layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
+      layer->setReshapeDimensions(flatten_dim);
+    } else {
+      auto* shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input);
+      uint32_t reduce_dim = 1;
 
-    int dim_prod = 1;
-    for (int i = 0; i < dims; i++) {
-      int dim_i = input->getDimensions().d[i];
-      PADDLE_ENFORCE_GT(
-          dim_i, 0, platform::errors::InvalidArgument(
-                        "flatten input dim should be > 0, but got %d.", dim_i));
-      dim_prod *= dim_i;
+      auto* reduce_prod_layer = TRT_ENGINE_ADD_LAYER(
+          engine_, Reduce, *(shape_layer->getOutput(0)),
+          nvinfer1::ReduceOperation::kPROD, reduce_dim, true);
+      int32_t* constant_weight_data = new int32_t[1];
+      constant_weight_data[0] = -1;
+      TensorRTEngine::Weight constant_weight{
+          nvinfer1::DataType::kINT32, static_cast<void*>(constant_weight_data),
+          1};
+      nvinfer1::Dims constant_dims;
+      constant_dims.nbDims = 1;
+      constant_dims.d[0] = 1;
+      auto* constant_layer = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, constant_dims, constant_weight.get());
+      std::vector<nvinfer1::ITensor*> itensors;
+      itensors.push_back(constant_layer->getOutput(0));
+      itensors.push_back(reduce_prod_layer->getOutput(0));
+      auto* concat_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), 2);
+      concat_layer->setAxis(0);
+      layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
+      layer->setInput(1, *(concat_layer->getOutput(0)));
     }
-    nvinfer1::Dims flatten_dim;
-    flatten_dim.nbDims = 1;
-    flatten_dim.d[0] = dim_prod;
-    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
-    layer->setReshapeDimensions(flatten_dim);
-
     auto output_name = op_desc.Output("Out")[0];
     RreplenishLayerAndOutput(layer, "flatten", {output_name}, test_mode);
   }

diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -300,23 +300,14 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
         if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false;
       }
     }
-    if (op_type == "flatten2") {
-      // flatten doesn't support dynamic shape currently
-      if (!desc.HasAttr("axis")) {
-        return false;
-      } else {
-        if (with_dynamic_shape) return false;
-        int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
-        if (axis != 1) return false;
-      }
-    }
-
-    if (op_type == "flatten") {
-      // flatten doesn't support dynamic shape currently
+    if (op_type == "flatten2" || op_type == "flatten") {
       if (!desc.HasAttr("axis")) {
         return false;
       } else {
+#if IS_TRT_VERSION_GE(7130)
+#else
         if (with_dynamic_shape) return false;
+#endif
         int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
         if (axis != 1) return false;
       }

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import PassVersionChecker
+from paddle.fluid.core import AnalysisConfig
+
+
+class TRTFlattenTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(
+                name="data", shape=[-1, 6, 64, 64], dtype="float32")
+            flatten_out = self.append_flatten(data)
+            out = fluid.layers.batch_norm(flatten_out, is_test=True)
+        self.feeds = {
+            "data": np.random.random([1, 6, 64, 64]).astype("float32"),
+        }
+        self.enable_trt = True
+        self.trt_parameters = TRTFlattenTest.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
+        self.fetch_list = [out]
+
+    def append_flatten(self, data):
+        return fluid.layers.flatten(data, axis=1)
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(use_gpu)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class TRTFlattenDynamicTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(
+                name="data", shape=[-1, 6, 64, 64], dtype="float32")
+            flatten_out = self.append_flatten(data)
+            out = fluid.layers.batch_norm(flatten_out, is_test=True)
+        self.feeds = {
+            "data": np.random.random([2, 6, 64, 64]).astype("float32"),
+        }
+        self.enable_trt = True
+        self.trt_parameters = TRTFlattenDynamicTest.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
+        self.dynamic_shape_params = TRTFlattenDynamicTest.DynamicShapeParam({
+            'data': [1, 6, 8, 8],
+            'flatten_0.tmp_0': [1, 6 * 8 * 8]
+        }, {'data': [3, 6, 128, 128],
+            'flatten_0.tmp_0': [3, 6 * 128 * 128]}, {
+                'data': [2, 6, 64, 64],
+                'flatten_0.tmp_0': [2, 6 * 64 * 64]
+            }, False)
+        self.fetch_list = [out]
+
+    def append_flatten(self, data):
+        return fluid.layers.flatten(data, axis=1)
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(use_gpu)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
@@ -312,33 +312,6 @@ def test_check_output(self):
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
 
-class TensorRTSubgraphPassFlattenTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[-1, 6, 64, 64], dtype="float32")
-            flatten_out = self.append_flatten(data)
-            reshape_out = fluid.layers.reshape(flatten_out, [-1, 0, 1, 1])
-            out = fluid.layers.batch_norm(reshape_out, is_test=True)
-        self.feeds = {
-            "data": np.random.random([1, 6, 64, 64]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TensorRTSubgraphPassFlattenTest.TensorRTParam(
-            1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
-        self.fetch_list = [out]
-
-    def append_flatten(self, data):
-        return fluid.layers.flatten(data, axis=1)
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
-
-
 class TensorRTSubgraphPassLayerNormTest(InferencePassTest):
     def setUp(self):
         self.set_params()