#pragma once #include #include #include #include #include #include #include "opencv_contrib.h" namespace neuralnet_tracker_ns { // Generally useful sigmoid function float sigmoid(float x); class Localizer { public: Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session); // Returns bounding wrt image coordinate of the input image // The preceeding float is the score for being a face normalized to [0,1]. std::pair run( const cv::Mat &frame); double last_inference_time_millis() const; private: inline static constexpr int INPUT_IMG_WIDTH = 288; inline static constexpr int INPUT_IMG_HEIGHT = 224; Ort::Session session_{nullptr}; // Inputs / outputs cv::Mat scaled_frame_{}, input_mat_{}; Ort::Value input_val_{nullptr}, output_val_{nullptr}; std::array results_; double last_inference_time_ = 0; }; class PoseEstimator { public: struct Face { cv::Quatf rotation; cv::Matx33f rotaxis_cov_tril; // Lower triangular factor of Cholesky decomposition cv::Rect2f box; cv::Point2f center; cv::Point2f center_stddev; float size; float size_stddev; }; PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session); /** Inference * * Coordinates are defined wrt. the image space of the input `frame`. * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom) */ std::optional run(const cv::Mat &frame, const cv::Rect &box); // Returns an image compatible with the 'frame' image for displaying. cv::Mat last_network_input() const; double last_inference_time_millis() const; bool has_uncertainty() const { return has_uncertainty_; } private: std::string get_network_input_name(size_t i) const; std::string get_network_output_name(size_t i) const; int64_t model_version_ = 0; // Queried meta data from the ONNX file Ort::Session session_{nullptr}; // ONNX's runtime context for running the model Ort::Allocator allocator_; // Memory allocator for tensors // Inputs cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?) std::vector input_val_; // Tensors to put into the model std::vector input_names_; // Refers to the names in the onnx model. std::vector input_c_names_; // Refers to the C names in the onnx model. // Outputs cv::Vec output_coord_{}; // 2d Coordinate and head size output. cv::Vec output_quat_{}; // Quaternion output cv::Vec output_box_{}; // Bounding box output cv::Matx33f output_rotaxis_scales_tril_{}; // Lower triangular matrix of LLT factorization of covariance of rotation vector as offset from output quaternion cv::Vec output_eyes_{}; cv::Vec output_coord_scales_{}; std::vector output_val_; // Tensors to put the model outputs in. std::vector output_names_; // Refers to the names in the onnx model. std::vector output_c_names_; // Refers to the C names in the onnx model. // More bookkeeping size_t num_recurrent_states_ = 0; double last_inference_time_ = 0; bool has_uncertainty_ = false; bool has_eye_closed_detection_ = false; }; // Finds the intensity where x percent of pixels have less intensity than that. int find_input_intensity_quantile(const cv::Mat& frame, float percentage); // Adjust brightness levels to full range and scales the value range to [-0.5, 0.5] void normalize_brightness(const cv::Mat& frame, cv::Mat& out); } // namespace neuralnet_tracker_ns