diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e1c8410b9db350..1f7a0c9a2262b0 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2918,6 +2918,7 @@ USE_TRT_CONVERTER(preln_groupnorm_act) USE_TRT_CONVERTER(cumsum) USE_TRT_CONVERTER(assign) USE_TRT_CONVERTER(unbind) +USE_TRT_CONVERTER(flip) #if IS_TRT_VERSION_GE(8522) USE_TRT_CONVERTER(flash_multihead_matmul) USE_TRT_CONVERTER(cross_multihead_matmul) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 2420033c08695f..6b7fd4324010d6 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -108,7 +108,8 @@ list( temporal_shift_op.cc einsum_op.cc unbind_op.cc - assign_op.cc) + assign_op.cc + flip_op.cc) if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) list(APPEND CONVERT_FILES emb_eltwise_layernorm.cc diff --git a/paddle/fluid/inference/tensorrt/convert/flip_op.cc b/paddle/fluid/inference/tensorrt/convert/flip_op.cc new file mode 100644 index 00000000000000..53ac35e690551d --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/flip_op.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +class FlipOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { + VLOG(4) << "convert a flip op to tensorrt layer"; + + framework::OpDesc op_desc(op, nullptr); + // Declare inputs + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + auto input_dims = input->getDimensions(); + + // Get Attrs + std::vector axis = + PADDLE_GET_CONST(std::vector, op_desc.GetAttr("axis")); + for (size_t i = 0; i < axis.size(); ++i) { + axis[i] += (axis[i] < 0) ? input_dims.nbDims : 0; + } + + nvinfer1::ITensor* shape_tensor = Shape(input); + // getAxisLength default is a scalar + auto getAxisLength = [&](int axis, bool scalar = true) { + int d = input_dims.d[axis]; + if (d >= 0) { + return Add1DConstantLayer(d, "", scalar); + } else { + return GetEleTensorOfShape(shape_tensor, axis, scalar); + } + }; + for (size_t i = 0; i < axis.size(); ++i) { + auto loop = TRT_ENGINE_ADD_LAYER(engine_, Loop); + nvinfer1::ITensor* tripLimit = getAxisLength(axis[i]); + loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT); + auto iterator = loop->addIterator(*input, axis[i], true); + std::vector zero_vec{0}; + std::vector one_vec{1}; + auto zero = Add1DConstantLayer(zero_vec); + auto one = Add1DConstantLayer(one_vec); + nvinfer1::IRecurrenceLayer* iRec = loop->addRecurrence(*zero); + nvinfer1::ITensor* iCur = iRec->getOutput(0); + auto iNext = TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *iCur, + *one, + nvinfer1::ElementWiseOperation::kSUM); + iRec->setInput(1, *iNext->getOutput(0)); + nvinfer1::ILoopOutputLayer* loopOut = loop->addLoopOutput( + *iterator->getOutput(0), nvinfer1::LoopOutput::kCONCATENATE, axis[i]); + loopOut->setInput(1, *tripLimit); + input = loopOut->getOutput(0); + } + + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input); + auto output_name = op_desc.Output("Out")[0]; + RreplenishLayerAndOutput(layer, "flip", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(flip, FlipOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 15ea158282f261..cbd21097ee7407 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2730,6 +2730,18 @@ struct SimpleOpTypeSetTeller : public Teller { #endif } + if (op_type == "flip") { + if (!with_dynamic_shape) { + VLOG(3) << "the flip does not support " + "static shape yet"; + return false; + } +#if !IS_TRT_VERSION_GE(7220) + VLOG(3) << "flip is not supported when TensorRT below 7.2.2"; + return false; +#endif + } + if (use_no_calib_int8) { return int8_teller_set.count(op_type); } else { @@ -2900,7 +2912,8 @@ struct SimpleOpTypeSetTeller : public Teller { "grid_sampler", "cumsum", "unbind", - "assign"}; + "assign", + "flip"}; std::unordered_set teller_set{ "matrix_multiply", @@ -3064,7 +3077,8 @@ struct SimpleOpTypeSetTeller : public Teller { "grid_sampler", "cumsum", "unbind", - "assign"}; + "assign", + "flip"}; }; struct GenericPluginTeller : public Teller { diff --git a/test/ir/inference/test_trt_convert_flip.py b/test/ir/inference/test_trt_convert_flip.py new file mode 100644 index 00000000000000..442a3381e38439 --- /dev/null +++ b/test/ir/inference/test_trt_convert_flip.py @@ -0,0 +1,140 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import partial +from typing import List + +import numpy as np +from program_config import ProgramConfig, TensorConfig +from trt_layer_auto_scan_test import TrtLayerAutoScanTest + +import paddle.inference as paddle_infer + + +class TrtConvertFlipTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7220: + return False + return True + + def sample_program_configs(self): + def generate_input(batch): + if self.dims == 4: + return np.random.random([batch, 3, 3, 24]).astype(np.float32) + elif self.dims == 3: + return np.random.random([batch, 3, 24]).astype(np.float32) + elif self.dims == 2: + return np.random.random([batch, 24]).astype(np.float32) + elif self.dims == 1: + return np.random.random([24]).astype(np.int32) + + def generate_axis(): + return np.arange(self.dims).tolist() + + for dims in [2, 3, 4]: + for batch in [3, 6, 9]: + self.dims = dims + axis = generate_axis() + ops_config = [ + { + "op_type": "flip", + "op_inputs": { + "X": ["input_data"], + }, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": {"axis": axis}, + } + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input, batch) + ), + }, + outputs=["output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 4: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 3 - 1, 3 - 1, 24 - 1] + } + self.dynamic_shape.max_input_shape = { + "input_data": [9, 3 + 1, 3 + 1, 24 + 1] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1, 3, 3, 24] + } + elif self.dims == 3: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 3 - 1, 24 - 1] + } + self.dynamic_shape.max_input_shape = { + "input_data": [9, 3 + 1, 24 + 1] + } + self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 24]} + elif self.dims == 2: + self.dynamic_shape.min_input_shape = {"input_data": [1, 24]} + self.dynamic_shape.max_input_shape = {"input_data": [9, 24]} + self.dynamic_shape.opt_input_shape = {"input_data": [1, 24]} + elif self.dims == 1: + self.dynamic_shape.min_input_shape = {"input_data": [24 - 1]} + self.dynamic_shape.max_input_shape = {"input_data": [24 + 1]} + self.dynamic_shape.opt_input_shape = {"input_data": [24]} + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7220: + return 0, 3 + return 1, 2 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + self.trt_param.max_batch_size = 9 + self.trt_param.workspace_size = 1073741824 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-3 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main()