diff options
Diffstat (limited to 'tracker-neuralnet/model_adapters.h')
-rw-r--r-- | tracker-neuralnet/model_adapters.h | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/tracker-neuralnet/model_adapters.h b/tracker-neuralnet/model_adapters.h new file mode 100644 index 00000000..c1aaa6de --- /dev/null +++ b/tracker-neuralnet/model_adapters.h @@ -0,0 +1,105 @@ +#pragma once + +#include <optional> +#include <array> +#include <vector> +#include <string> + +#include <onnxruntime_cxx_api.h> +#include <opencv2/core.hpp> +#include "opencv_contrib.h" + + +namespace neuralnet_tracker_ns +{ + +// Generally useful sigmoid function +float sigmoid(float x); + + +class Localizer +{ + public: + Localizer(Ort::MemoryInfo &allocator_info, + Ort::Session &&session); + + // Returns bounding wrt image coordinate of the input image + // The preceeding float is the score for being a face normalized to [0,1]. + std::pair<float, cv::Rect2f> run( + const cv::Mat &frame); + + double last_inference_time_millis() const; + private: + inline static constexpr int INPUT_IMG_WIDTH = 288; + inline static constexpr int INPUT_IMG_HEIGHT = 224; + Ort::Session session_{nullptr}; + // Inputs / outputs + cv::Mat scaled_frame_{}, input_mat_{}; + Ort::Value input_val_{nullptr}, output_val_{nullptr}; + std::array<float, 5> results_; + double last_inference_time_ = 0; +}; + + +class PoseEstimator +{ + public: + struct Face + { + cv::Quatf rotation; + cv::Matx33f rotaxis_cov_tril; // Lower triangular factor of Cholesky decomposition + cv::Rect2f box; + cv::Point2f center; + float size; + cv::Matx33f center_size_cov_tril; // Lower triangular factor of Cholesky decomposition + }; + + PoseEstimator(Ort::MemoryInfo &allocator_info, + Ort::Session &&session); + /** Inference + * + * Coordinates are defined wrt. the image space of the input `frame`. + * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom) + */ + std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box); + // Returns an image compatible with the 'frame' image for displaying. + cv::Mat last_network_input() const; + double last_inference_time_millis() const; + bool has_uncertainty() const { return has_uncertainty_; } + + private: + std::string get_network_input_name(size_t i) const; + std::string get_network_output_name(size_t i) const; + int64_t model_version_ = 0; // Queried meta data from the ONNX file + Ort::Session session_{nullptr}; // ONNX's runtime context for running the model + mutable Ort::Allocator allocator_; // Memory allocator for tensors + // Inputs + cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?) + std::vector<Ort::Value> input_val_; // Tensors to put into the model + std::vector<std::string> input_names_; // Refers to the names in the onnx model. + std::vector<const char *> input_c_names_; // Refers to the C names in the onnx model. + // Outputs + cv::Vec<float, 3> output_coord_{}; // 2d Coordinate and head size output. + cv::Vec<float, 4> output_quat_{}; // Quaternion output + cv::Vec<float, 4> output_box_{}; // Bounding box output + cv::Matx33f output_rotaxis_scales_tril_{}; // Lower triangular matrix of LLT factorization of covariance of rotation vector as offset from output quaternion + cv::Matx33f output_coord_scales_tril_{}; // Lower triangular factor + cv::Vec3f output_coord_scales_std_{}; // Depending on the model, alternatively a 3d vector with standard deviations. + std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in. + std::vector<std::string> output_names_; // Refers to the names in the onnx model. + std::vector<const char *> output_c_names_; // Refers to the C names in the onnx model. + // More bookkeeping + double last_inference_time_ = 0; + bool has_uncertainty_ = false; + bool pos_scale_uncertainty_is_matrix_ = false; +}; + + +// Finds the intensity where x percent of pixels have less intensity than that. +int find_input_intensity_quantile(const cv::Mat& frame, float percentage); + +// Adjust brightness levels to full range and scales the value range to [-0.5, 0.5] +void normalize_brightness(const cv::Mat& frame, cv::Mat& out); + + +} // namespace neuralnet_tracker_ns |