|
| 1 | +#include <opencv2/opencv.hpp> |
| 2 | + |
| 3 | +namespace { |
| 4 | +class OrtSessionHandler { |
| 5 | + public: |
| 6 | + /** |
| 7 | + * @param model_path path to onnx model |
| 8 | + * @param gpu_idx index of the gpu, index < 0 means no gpu |
| 9 | + */ |
| 10 | + OrtSessionHandler(const std::string &model_path, std::vector<std::vector<int>> &input_tensor_shapes, |
| 11 | + int gpu_idx = -1); |
| 12 | + |
| 13 | + std::vector<float> preprocess(const cv::Mat &image, int target_height, int target_width, |
| 14 | + const std::vector<float> &mean_val = {0.5, 0.5, 0.5}, |
| 15 | + const std::vector<float> &std_val = {0.5, 0.5, 0.5}) const; |
| 16 | + |
| 17 | + private: |
| 18 | + std::string _model_path; |
| 19 | + std::vector<std::vector<int>> _input_tensor_shapes; |
| 20 | + int _gpu_idx; |
| 21 | +}; |
| 22 | + |
| 23 | +constexpr int BISENETV2_CITYSCAPES_IMAGE_HEIGHT = 1024; |
| 24 | +constexpr int BISENETV2_CITYSCAPES_IMAGE_WIDTH = 1024; |
| 25 | +static const std::vector<std::string> CITY_SCAPES_CLASSES = { |
| 26 | + "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", "traffic sign", "vegetation", "terrain", |
| 27 | + "sky", "person", "rider", "car", "truck", "bus", "train", "motorcycle", "bicycle"}; |
| 28 | + |
| 29 | +static const std::vector<std::array<int, 3>> CITY_SCAPES_COLOR_CHART = { |
| 30 | + {128, 64, 128}, {244, 35, 232}, {70, 70, 70}, {102, 102, 156}, {190, 153, 153}, {153, 153, 153}, {250, 170, 30}, |
| 31 | + {220, 220, 0}, {107, 142, 35}, {152, 251, 152}, {70, 130, 180}, {220, 20, 60}, {255, 0, 0}, {0, 0, 142}, |
| 32 | + {0, 0, 70}, {0, 60, 100}, {0, 80, 100}, {0, 0, 230}, {119, 11, 32}}; |
| 33 | +} // namespace |
| 34 | + |
| 35 | +int main(int argc, char *argv[]) { |
| 36 | + if (argc != 4) { |
| 37 | + std::cerr << "Usage: [app] [/path/to/image] [path/to/onnx/model] [gpu/idx]" << std::endl; |
| 38 | + return EXIT_FAILURE; |
| 39 | + } |
| 40 | + const std::string image_path = argv[1]; |
| 41 | + cv::Mat image = cv::imread(image_path); |
| 42 | + |
| 43 | + if (image.empty()) { |
| 44 | + std::cerr << "failed to load " << image_path << std::endl; |
| 45 | + return EXIT_FAILURE; |
| 46 | + } |
| 47 | + |
| 48 | + const std::string onnx_model_path = argv[2]; |
| 49 | + const int gpu_idx = std::atoi(argv[3]); |
| 50 | + |
| 51 | + std::vector<std::vector<int>> input_tensor_shapes{ |
| 52 | + {1, 3, BISENETV2_CITYSCAPES_IMAGE_HEIGHT, BISENETV2_CITYSCAPES_IMAGE_WIDTH}}; |
| 53 | + OrtSessionHandler ort_session_handler(onnx_model_path, input_tensor_shapes, gpu_idx); |
| 54 | + auto input_data = |
| 55 | + ort_session_handler.preprocess(image, BISENETV2_CITYSCAPES_IMAGE_WIDTH, BISENETV2_CITYSCAPES_IMAGE_WIDTH); |
| 56 | + |
| 57 | + return EXIT_SUCCESS; |
| 58 | +} |
| 59 | + |
| 60 | +namespace { |
| 61 | +OrtSessionHandler::OrtSessionHandler(const std::string &model_path, std::vector<std::vector<int>> &input_tensor_shapes, |
| 62 | + int gpu_idx) |
| 63 | + : _model_path(model_path), _input_tensor_shapes(input_tensor_shapes), _gpu_idx(gpu_idx) {} |
| 64 | + |
| 65 | +std::vector<float> OrtSessionHandler::preprocess(const cv::Mat &image, int target_height, int target_width, |
| 66 | + const std::vector<float> &mean_val, |
| 67 | + const std::vector<float> &std_val) const { |
| 68 | + if (image.empty() || image.channels() != 3) { |
| 69 | + throw std::runtime_error("invalid image"); |
| 70 | + } |
| 71 | + |
| 72 | + if (target_height * target_width == 0) { |
| 73 | + throw std::runtime_error("invalid dimension"); |
| 74 | + } |
| 75 | + |
| 76 | + cv::Mat processed = image.clone(); |
| 77 | + |
| 78 | + if (image.rows != target_height || image.cols != target_width) { |
| 79 | + cv::resize(processed, processed, cv::Size(target_width, target_height), 0, 0, cv::INTER_CUBIC); |
| 80 | + } |
| 81 | + cv::cvtColor(processed, processed, cv::COLOR_BGR2RGB); |
| 82 | + std::vector<float> data(3 * target_height * target_width); |
| 83 | + |
| 84 | + for (int i = 0; i < target_height; ++i) { |
| 85 | + for (int j = 0; j < target_width; ++j) { |
| 86 | + for (int c = 0; c < 3; ++c) { |
| 87 | + data[c * target_height * target_width + i * target_width + j] = |
| 88 | + (image.data[i * target_width * 3 + j * 3 + c] / 255.0 - mean_val[c]) / std_val[c]; |
| 89 | + } |
| 90 | + } |
| 91 | + } |
| 92 | + |
| 93 | + return data; |
| 94 | +} |
| 95 | +} // namespace |
0 commit comments