1
+ #include < onnxruntime/core/session/experimental_onnxruntime_cxx_api.h>
2
+
3
+ // if onnxruntime is built with cuda provider, the following header can be added to use cuda gpu
4
+ // #include <onnxruntime/core/providers/cuda/cuda_provider_factory.h>
5
+
6
+ #include < numeric>
1
7
#include < opencv2/opencv.hpp>
2
8
3
9
namespace {
4
10
class OrtSessionHandler {
5
11
public:
6
12
/* *
7
13
* @param model_path path to onnx model
8
- * @param gpu_idx index of the gpu, index < 0 means no gpu
9
14
*/
10
- OrtSessionHandler (const std::string &model_path, std::vector<std::vector<int >> &input_tensor_shapes,
11
- int gpu_idx = -1 );
15
+ OrtSessionHandler (const std::string &model_path, std::vector<std::vector<int64_t >> &input_tensor_shapes);
16
+
17
+ virtual std::vector<float > preprocess (const cv::Mat &image, int target_height, int target_width,
18
+ const std::vector<float > &mean_val = {0.5 , 0.5 , 0.5 },
19
+ const std::vector<float > &std_val = {0.5 , 0.5 , 0.5 }) const ;
12
20
13
- std::vector<float > preprocess (const cv::Mat &image, int target_height, int target_width,
14
- const std::vector<float > &mean_val = {0.5 , 0.5 , 0.5 },
15
- const std::vector<float > &std_val = {0.5 , 0.5 , 0.5 }) const ;
21
+ /* *
22
+ * @file function to get output tensors
23
+ * @brief each std::pair<DataType *, std::vector<int64_t>> is a pair of output tensor's data and its dimension
24
+ * most semantic segmentation network will have only one output tensor
25
+ */
26
+ template <typename DataType = float >
27
+ std::vector<std::pair<DataType *, std::vector<int64_t >>> run (const std::vector<std::vector<float >> &input_data) const ;
16
28
17
29
private:
18
30
std::string _model_path;
19
- std::vector<std::vector<int >> _input_tensor_shapes;
20
- int _gpu_idx;
31
+ std::vector<std::vector<int64_t >> _input_tensor_shapes;
32
+ Ort::Env _env;
33
+ std::unique_ptr<Ort::Experimental::Session> _session;
21
34
};
22
35
23
36
constexpr int BISENETV2_CITYSCAPES_IMAGE_HEIGHT = 1024 ;
24
37
constexpr int BISENETV2_CITYSCAPES_IMAGE_WIDTH = 1024 ;
38
+ constexpr int CITYSCAPES_NUM_CLASSES = 19 ;
39
+
25
40
static const std::vector<std::string> CITY_SCAPES_CLASSES = {
26
41
" road" , " sidewalk" , " building" , " wall" , " fence" , " pole" , " traffic light" , " traffic sign" , " vegetation" , " terrain" ,
27
42
" sky" , " person" , " rider" , " car" , " truck" , " bus" , " train" , " motorcycle" , " bicycle" };
28
43
29
- static const std::vector<std::array<int , 3 >> CITY_SCAPES_COLOR_CHART = {
30
- {128 , 64 , 128 }, {244 , 35 , 232 }, {70 , 70 , 70 }, {102 , 102 , 156 }, {190 , 153 , 153 }, {153 , 153 , 153 }, {250 , 170 , 30 },
31
- {220 , 220 , 0 }, {107 , 142 , 35 }, {152 , 251 , 152 }, {70 , 130 , 180 }, {220 , 20 , 60 }, {255 , 0 , 0 }, {0 , 0 , 142 },
32
- {0 , 0 , 70 }, {0 , 60 , 100 }, {0 , 80 , 100 }, {0 , 0 , 230 }, {119 , 11 , 32 }};
44
+ inline std::vector<cv::Scalar> to_cv_sccalar_colors (const std::vector<std::array<int , 3 >> &colors) {
45
+ std::vector<cv::Scalar> result;
46
+ result.reserve (colors.size ());
47
+ std::transform (std::begin (colors), std::end (colors), std::back_inserter (result),
48
+ [](const auto &elem) { return cv::Scalar (elem[0 ], elem[1 ], elem[2 ]); });
49
+
50
+ return result;
51
+ }
52
+
53
+ static const std::vector<cv::Scalar> CITYSCAPES_COLORS = to_cv_sccalar_colors({{128 , 64 , 128 },
54
+ {244 , 35 , 232 },
55
+ {70 , 70 , 70 },
56
+ {102 , 102 , 156 },
57
+ {190 , 153 , 153 },
58
+ {153 , 153 , 153 },
59
+ {250 , 170 , 30 },
60
+ {220 , 220 , 0 },
61
+ {107 , 142 , 35 },
62
+ {152 , 251 , 152 },
63
+ {70 , 130 , 180 },
64
+ {220 , 20 , 60 },
65
+ {255 , 0 , 0 },
66
+ {0 , 0 , 142 },
67
+ {0 , 0 , 70 },
68
+ {0 , 60 , 100 },
69
+ {0 , 80 , 100 },
70
+ {0 , 0 , 230 },
71
+ {119 , 11 , 32 }});
33
72
} // namespace
34
73
35
74
int main (int argc, char *argv[]) {
36
- if (argc != 4 ) {
37
- std::cerr << " Usage: [app] [/path/to/image] [path/to/onnx/model] [gpu/idx] " << std::endl;
75
+ if (argc != 3 ) {
76
+ std::cerr << " Usage: [app] [/path/to/image] [path/to/onnx/model]" << std::endl;
38
77
return EXIT_FAILURE;
39
78
}
40
79
const std::string image_path = argv[1 ];
@@ -46,21 +85,54 @@ int main(int argc, char *argv[]) {
46
85
}
47
86
48
87
const std::string onnx_model_path = argv[2 ];
49
- const int gpu_idx = std::atoi (argv[3 ]);
50
88
51
- std::vector<std::vector<int >> input_tensor_shapes{
89
+ std::vector<std::vector<int64_t >> input_tensor_shapes{
52
90
{1 , 3 , BISENETV2_CITYSCAPES_IMAGE_HEIGHT, BISENETV2_CITYSCAPES_IMAGE_WIDTH}};
53
- OrtSessionHandler ort_session_handler (onnx_model_path, input_tensor_shapes, gpu_idx );
54
- auto input_data =
91
+ OrtSessionHandler ort_session_handler (onnx_model_path, input_tensor_shapes);
92
+ std::vector< float > input_data =
55
93
ort_session_handler.preprocess (image, BISENETV2_CITYSCAPES_IMAGE_WIDTH, BISENETV2_CITYSCAPES_IMAGE_WIDTH);
56
94
95
+ // output data's type might change for each different model
96
+ auto output_data = ort_session_handler.run <int64_t >({input_data});
97
+
98
+ // postprocess
99
+ // this might change for each different model
100
+ cv::Mat segm (BISENETV2_CITYSCAPES_IMAGE_HEIGHT, BISENETV2_CITYSCAPES_IMAGE_WIDTH, CV_8UC (3 ));
101
+ for (int i = 0 ; i < BISENETV2_CITYSCAPES_IMAGE_HEIGHT; ++i) {
102
+ cv::Vec3b *ptr_segm = segm.ptr <cv::Vec3b>(i);
103
+ for (int j = 0 ; j < BISENETV2_CITYSCAPES_IMAGE_WIDTH; ++j) {
104
+ const auto &color = CITYSCAPES_COLORS[output_data[0 ].first [i * BISENETV2_CITYSCAPES_IMAGE_WIDTH + j]];
105
+ ptr_segm[j] = cv::Vec3b (color[0 ], color[1 ], color[2 ]);
106
+ }
107
+ }
108
+ cv::resize (segm, segm, image.size (), 0 , 0 , cv::INTER_NEAREST);
109
+ float blended_alpha = 0.4 ;
110
+ segm = (1 - blended_alpha) * image + blended_alpha * segm;
111
+ cv::imwrite (" out_img.jpg" , segm);
112
+
57
113
return EXIT_SUCCESS;
58
114
}
59
115
60
116
namespace {
61
- OrtSessionHandler::OrtSessionHandler (const std::string &model_path, std::vector<std::vector<int >> &input_tensor_shapes,
62
- int gpu_idx)
63
- : _model_path(model_path), _input_tensor_shapes(input_tensor_shapes), _gpu_idx(gpu_idx) {}
117
+ OrtSessionHandler::OrtSessionHandler (const std::string &model_path,
118
+ std::vector<std::vector<int64_t >> &input_tensor_shapes)
119
+ : _model_path(model_path),
120
+ _input_tensor_shapes (input_tensor_shapes),
121
+ _env(Ort::Env(ORT_LOGGING_LEVEL_WARNING, " ort session handler" )),
122
+ _session(nullptr ) {
123
+ Ort::SessionOptions session_options;
124
+
125
+ // if onnxruntime is built with cuda provider, the following function can be added to use cuda gpu
126
+ // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, gpu_index));
127
+
128
+ std::basic_string<ORTCHAR_T> ort_model_path;
129
+ std::copy (model_path.begin (), model_path.end (), std::back_inserter (ort_model_path));
130
+ _session.reset (new Ort::Experimental::Session (_env, ort_model_path, session_options));
131
+
132
+ if (_session->GetInputCount () != input_tensor_shapes.size ()) {
133
+ throw std::runtime_error (" invalid input size" );
134
+ }
135
+ }
64
136
65
137
std::vector<float > OrtSessionHandler::preprocess (const cv::Mat &image, int target_height, int target_width,
66
138
const std::vector<float > &mean_val,
@@ -70,7 +142,7 @@ std::vector<float> OrtSessionHandler::preprocess(const cv::Mat &image, int targe
70
142
}
71
143
72
144
if (target_height * target_width == 0 ) {
73
- throw std::runtime_error (" invalid dimension" );
145
+ throw std::runtime_error (" invalid target dimension" );
74
146
}
75
147
76
148
cv::Mat processed = image.clone ();
@@ -85,11 +157,36 @@ std::vector<float> OrtSessionHandler::preprocess(const cv::Mat &image, int targe
85
157
for (int j = 0 ; j < target_width; ++j) {
86
158
for (int c = 0 ; c < 3 ; ++c) {
87
159
data[c * target_height * target_width + i * target_width + j] =
88
- (image .data [i * target_width * 3 + j * 3 + c] / 255.0 - mean_val[c]) / std_val[c];
160
+ (processed .data [i * target_width * 3 + j * 3 + c] / 255.0 - mean_val[c]) / std_val[c];
89
161
}
90
162
}
91
163
}
92
164
93
165
return data;
94
166
}
167
+
168
+ template <typename DataType>
169
+ std::vector<std::pair<DataType *, std::vector<int64_t >>> OrtSessionHandler::run (
170
+ const std::vector<std::vector<float >> &input_data) const {
171
+ if (_session->GetInputCount () != input_data.size ()) {
172
+ throw std::runtime_error (" invalid input size" );
173
+ }
174
+
175
+ std::vector<Ort::Value> input_tensors;
176
+ for (int i = 0 ; i < _session->GetInputCount (); ++i) {
177
+ input_tensors.emplace_back (Ort::Experimental::Value::CreateTensor<float >(
178
+ const_cast <float *>(input_data[i].data ()), input_data[i].size (), _input_tensor_shapes[i]));
179
+ }
180
+
181
+ std::vector<Ort::Value> output_tensors =
182
+ _session->Run (_session->GetInputNames (), input_tensors, _session->GetOutputNames ());
183
+
184
+ std::vector<std::pair<DataType *, std::vector<int64_t >>> output (_session->GetOutputCount ());
185
+ std::vector<std::vector<int64_t >> output_shapes = _session->GetOutputShapes ();
186
+ for (int i = 0 ; i < _session->GetOutputCount (); ++i) {
187
+ output[i] = std::make_pair (std::move (output_tensors[i].GetTensorMutableData <DataType>()), output_shapes[i]);
188
+ }
189
+
190
+ return output;
191
+ }
95
192
} // namespace
0 commit comments