diff options
| author | Michael Welter <michael@welter-4d.de> | 2022-09-11 20:55:26 +0200 | 
|---|---|---|
| committer | Stanislaw Halik <sthalik@misaki.pl> | 2022-11-01 13:51:35 +0100 | 
| commit | 08f1fcad1c74e25f97641a0ccbd229b267ec528c (patch) | |
| tree | 000b1b276bc7df4a74fd493dab05bcce68801de8 /tracker-neuralnet/ftnoir_tracker_neuralnet.h | |
| parent | 77d6abaf53dbe2ee6334bd59b112e25d694a2f65 (diff) | |
tracker/nn: Tweaks, refactoring, a deadzone filtering and support for uncertainty estimation
* Add rudimentary test for two functions .. maybe more in future
* Fix the rotation correction from vertical translation
* Move preview class to new files
* Move neural network model adapters to new files
* Add utility functions for opencv
* Query the model inputs/outputs by name to see what is available
* Supports outputs for standard deviation of the data distribution -
  What you get if you let your model output the full parameters of a
  gaussian distribution (depending on the inputs) and fit it with
  negative log likelihood loss.
* Disabled support for sequence models
* Add support for detection of eye open/close classification.
  Scale uncertainty estimate up if eyes closed
* Add a deadzone filter which activates if the model supports uncertainty
  quantification. The deadzone scales becomes larger the more uncertain
  the model/data are. This is mostly supposed to be useful to suppress
  large estimate errors when the user blinks with the eyes
* Fix distance being twice of what it should have been
Diffstat (limited to 'tracker-neuralnet/ftnoir_tracker_neuralnet.h')
| -rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.h | 127 | 
1 files changed, 24 insertions, 103 deletions
| diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index 9b481186..9e0374da 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -7,6 +7,11 @@  #pragma once +#include "ui_neuralnet-trackercontrols.h" +#include "model_adapters.h" +#include "deadzone_filter.h" +#include "preview.h" +  #include "options/options.hpp"  #include "api/plugin-api.hpp"  #include "cv/video-widget.hpp" @@ -27,13 +32,9 @@  #include <cinttypes>  #include <array> -#include <onnxruntime_cxx_api.h> -  #include <opencv2/core.hpp> -#include <opencv2/core/types.hpp>  #include <opencv2/imgproc.hpp> -#include "ui_neuralnet-trackercontrols.h"  namespace neuralnet_tracker_ns  { @@ -81,6 +82,8 @@ struct Settings : opts {      value<bool> use_mjpeg { b, "use-mjpeg", false };      value<int> num_threads { b, "num-threads", 1 };      value<int> resolution { b, "force-resolution", 0 }; +    value<double> deadzone_size { b, "deadzone-size", 1. }; +    value<double> deadzone_hardness { b, "deadzone-hardness", 1.5 };      Settings();  }; @@ -94,101 +97,6 @@ struct CamIntrinsics  }; -class Localizer -{ -    public: -        Localizer(Ort::MemoryInfo &allocator_info, -                    Ort::Session &&session); -         -        // Returns bounding wrt image coordinate of the input image -        // The preceeding float is the score for being a face normalized to [0,1]. -        std::pair<float, cv::Rect2f> run( -            const cv::Mat &frame); - -        double last_inference_time_millis() const; -    private: -        inline static constexpr int INPUT_IMG_WIDTH = 288; -        inline static constexpr int INPUT_IMG_HEIGHT = 224; -        Ort::Session session_{nullptr}; -        // Inputs / outputs -        cv::Mat scaled_frame_{}, input_mat_{}; -        Ort::Value input_val_{nullptr}, output_val_{nullptr}; -        std::array<float, 5> results_; -        double last_inference_time_ = 0; -}; - - -class PoseEstimator -{ -    public: -        struct Face -        { -            std::array<float,4> rotation; // Quaternion, (w, x, y, z) -            cv::Rect2f box; -            cv::Point2f center; -            float size; -        }; - -        PoseEstimator(Ort::MemoryInfo &allocator_info, -                        Ort::Session &&session); -        /** Inference -        * -        * Coordinates are defined wrt. the image space of the input `frame`. -        * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom) -        */ -        std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box); -        // Returns an image compatible with the 'frame' image for displaying. -        cv::Mat last_network_input() const; -        double last_inference_time_millis() const; -    private: -        // Operates on the private image data members -        int find_input_intensity_90_pct_quantile() const; - -        int64_t model_version_ = 0;  // Queried meta data from the ONNX file -        Ort::Session session_{nullptr};  // ONNX's runtime context for running the model -        Ort::Allocator allocator_;   // Memory allocator for tensors -        // Inputs -        cv::Mat scaled_frame_{}, input_mat_{};  // Input. One is the original crop, the other is rescaled (?) -        std::vector<Ort::Value> input_val_;    // Tensors to put into the model -        std::vector<const char*> input_names_; // Refers to the names in the onnx model.  -        // Outputs -        cv::Vec<float, 3> output_coord_{};  // 2d Coordinate and head size output. -        cv::Vec<float, 4> output_quat_{};   //  Quaternion output -        cv::Vec<float, 4> output_box_{};    // Bounding box output -        std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in. -        std::vector<const char*> output_names_; // Refers to the names in the onnx model.  -        size_t num_recurrent_states_ = 0; -        double last_inference_time_ = 0; -}; - - -class Preview -{ -public: -    void init(const cv_video_widget& widget); -    void copy_video_frame(const cv::Mat& frame); -    void draw_gizmos( -        const std::optional<PoseEstimator::Face> &face, -        const Affine& pose, -        const std::optional<cv::Rect2f>& last_roi, -        const std::optional<cv::Rect2f>& last_localizer_roi, -        const cv::Point2f& neckjoint_position); -    void overlay_netinput(const cv::Mat& netinput); -    void draw_fps(double fps, double last_inference_time); -    void copy_to_widget(cv_video_widget& widget); -private: -    // Transform from camera frame to preview -    cv::Rect2f transform(const cv::Rect2f& r) const; -    cv::Point2f transform(const cv::Point2f& p) const; -    float transform(float s) const; - -    cv::Mat preview_image_; -    cv::Size preview_size_ = { 0, 0 }; -    float scale_ = 1.f;   -    cv::Point2f offset_ = { 0.f, 0.f}; -}; - -  class NeuralNetTracker : protected virtual QThread, public ITracker  {      Q_OBJECT @@ -214,7 +122,10 @@ private:          const std::optional<PoseEstimator::Face> &face,          const Affine& pose);      void update_fps(double dt); -    Affine compute_pose(const PoseEstimator::Face &face) const; +    // Secretly applies filtering while computing the pose in 3d space. +    QuatPose compute_filtered_pose(const PoseEstimator::Face &face); +    // Compute the pose in 3d space taking the network outputs +    QuatPose transform_to_world_pose(const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) const;      Settings settings_;      std::optional<Localizer> localizer_; @@ -227,7 +138,7 @@ private:      std::array<cv::Mat,2> downsized_original_images_ = {}; // Image pyramid      std::optional<cv::Rect2f> last_localizer_roi_;      std::optional<cv::Rect2f> last_roi_; -    static constexpr float HEAD_SIZE_MM = 200.f; +    static constexpr float HEAD_SIZE_MM = 200.f; // In the vertical. Approximately.      mutable QMutex stats_mtx_;      double fps_ = 0; @@ -238,8 +149,9 @@ private:      int num_threads_ = 1;      bool is_visible_ = true; -    QMutex mtx_; // Protects the pose -    Affine pose_; +    QMutex mtx_ = {}; // Protects the pose +    std::optional<QuatPose> last_pose_ = {}; +    Affine last_pose_affine_ = {};      Preview preview_;      std::unique_ptr<cv_video_widget> video_widget_; @@ -288,6 +200,15 @@ class NeuralNetMetadata : public Metadata  } // neuralnet_tracker_ns + +namespace neuralnet_tracker_tests +{ + +void run(); + +} + +  using neuralnet_tracker_ns::NeuralNetTracker;  using neuralnet_tracker_ns::NeuralNetDialog;  using neuralnet_tracker_ns::NeuralNetMetadata; | 
