diff options
Diffstat (limited to 'tracker-neuralnet/model_adapters.h')
| -rw-r--r-- | tracker-neuralnet/model_adapters.h | 102 | 
1 files changed, 102 insertions, 0 deletions
| diff --git a/tracker-neuralnet/model_adapters.h b/tracker-neuralnet/model_adapters.h new file mode 100644 index 00000000..3fbfb861 --- /dev/null +++ b/tracker-neuralnet/model_adapters.h @@ -0,0 +1,102 @@ +#pragma once + +#include <optional> +#include <array> +#include <vector> + +#include <onnxruntime_cxx_api.h> +#include <opencv2/core.hpp> +#include "opencv_contrib.h" + + +namespace neuralnet_tracker_ns +{ + +// Generally useful sigmoid function +float sigmoid(float x); + + +class Localizer +{ +    public: +        Localizer(Ort::MemoryInfo &allocator_info, +                    Ort::Session &&session); +         +        // Returns bounding wrt image coordinate of the input image +        // The preceeding float is the score for being a face normalized to [0,1]. +        std::pair<float, cv::Rect2f> run( +            const cv::Mat &frame); + +        double last_inference_time_millis() const; +    private: +        inline static constexpr int INPUT_IMG_WIDTH = 288; +        inline static constexpr int INPUT_IMG_HEIGHT = 224; +        Ort::Session session_{nullptr}; +        // Inputs / outputs +        cv::Mat scaled_frame_{}, input_mat_{}; +        Ort::Value input_val_{nullptr}, output_val_{nullptr}; +        std::array<float, 5> results_; +        double last_inference_time_ = 0; +}; + + +class PoseEstimator +{ +    public: +        struct Face +        { +            cv::Quatf rotation; +            cv::Matx33f rotaxis_cov_tril; // Lower triangular factor of Cholesky decomposition +            cv::Rect2f box; +            cv::Point2f center; +            cv::Point2f center_stddev; +            float size; +            float size_stddev; +        }; + +        PoseEstimator(Ort::MemoryInfo &allocator_info, +                        Ort::Session &&session); +        /** Inference +        * +        * Coordinates are defined wrt. the image space of the input `frame`. +        * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom) +        */ +        std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box); +        // Returns an image compatible with the 'frame' image for displaying. +        cv::Mat last_network_input() const; +        double last_inference_time_millis() const; +        bool has_uncertainty() const { return has_uncertainty_; } + +    private: +        int64_t model_version_ = 0;  // Queried meta data from the ONNX file +        Ort::Session session_{nullptr};  // ONNX's runtime context for running the model +        Ort::Allocator allocator_;   // Memory allocator for tensors +        // Inputs +        cv::Mat scaled_frame_{}, input_mat_{};  // Input. One is the original crop, the other is rescaled (?) +        std::vector<Ort::Value> input_val_;    // Tensors to put into the model +        std::vector<const char*> input_names_; // Refers to the names in the onnx model.  +        // Outputs +        cv::Vec<float, 3> output_coord_{};  // 2d Coordinate and head size output. +        cv::Vec<float, 4> output_quat_{};   //  Quaternion output +        cv::Vec<float, 4> output_box_{};    // Bounding box output +        cv::Matx33f output_rotaxis_scales_tril_{}; // Lower triangular matrix of LLT factorization of covariance of rotation vector as offset from output quaternion +        cv::Vec<float, 2> output_eyes_{}; +        cv::Vec<float, 3> output_coord_scales_{}; +        std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in. +        std::vector<const char*> output_names_; // Refers to the names in the onnx model. +        // More bookkeeping +        size_t num_recurrent_states_ = 0; +        double last_inference_time_ = 0; +        bool has_uncertainty_ = false; +        bool has_eye_closed_detection_ = false; +}; + + +// Finds the intensity where x percent of pixels have less intensity than that. +int find_input_intensity_quantile(const cv::Mat& frame, float percentage); + +// Adjust brightness levels to full range and scales the value range to [-0.5, 0.5] +void normalize_brightness(const cv::Mat& frame, cv::Mat& out); + + +} // namespace neuralnet_tracker_ns
\ No newline at end of file | 
