tracker/nn: Tweaks, refactoring, a deadzone filtering and support for uncertainty estimation

* Add rudimentary test for two functions .. maybe more in future * Fix the rotation correction from vertical translation * Move preview class to new files * Move neural network model adapters to new files * Add utility functions for opencv * Query the model inputs/outputs by name to see what is available * Supports outputs for standard deviation of the data distribution - What you get if you let your model output the full parameters of a gaussian distribution (depending on the inputs) and fit it with negative log likelihood loss. * Disabled support for sequence models * Add support for detection of eye open/close classification. Scale uncertainty estimate up if eyes closed * Add a deadzone filter which activates if the model supports uncertainty quantification. The deadzone scales becomes larger the more uncertain the model/data are. This is mostly supposed to be useful to suppress large estimate errors when the user blinks with the eyes * Fix distance being twice of what it should have been
author: Michael Welter <michael@welter-4d.de> 2022-09-11 20:55:26 +0200
committer: Stanislaw Halik <sthalik@misaki.pl> 2022-11-01 13:51:35 +0100
commit: 08f1fcad1c74e25f97641a0ccbd229b267ec528c (patch)
tree: 000b1b276bc7df4a74fd493dab05bcce68801de8 /tracker-neuralnet/ftnoir_tracker_neuralnet.h
parent: 77d6abaf53dbe2ee6334bd59b112e25d694a2f65 (diff)
1 files changed, 24 insertions, 103 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
index 9b481186..9e0374da 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
@@ -7,6 +7,11 @@
 
 #pragma once
 
+#include "ui_neuralnet-trackercontrols.h"
+#include "model_adapters.h"
+#include "deadzone_filter.h"
+#include "preview.h"
+
 #include "options/options.hpp"
 #include "api/plugin-api.hpp"
 #include "cv/video-widget.hpp"
@@ -27,13 +32,9 @@
 #include <cinttypes>
 #include <array>
 
-#include <onnxruntime_cxx_api.h>
-
 #include <opencv2/core.hpp>
-#include <opencv2/core/types.hpp>
 #include <opencv2/imgproc.hpp>
 
-#include "ui_neuralnet-trackercontrols.h"
 
 namespace neuralnet_tracker_ns
 {
@@ -81,6 +82,8 @@ struct Settings : opts {
     value<bool> use_mjpeg { b, "use-mjpeg", false };
     value<int> num_threads { b, "num-threads", 1 };
     value<int> resolution { b, "force-resolution", 0 };
+    value<double> deadzone_size { b, "deadzone-size", 1. };
+    value<double> deadzone_hardness { b, "deadzone-hardness", 1.5 };
     Settings();
 };
 
@@ -94,101 +97,6 @@ struct CamIntrinsics
 };
 
 
-class Localizer
-{
-    public:
-        Localizer(Ort::MemoryInfo &allocator_info,
-                    Ort::Session &&session);
-        
-        // Returns bounding wrt image coordinate of the input image
-        // The preceeding float is the score for being a face normalized to [0,1].
-        std::pair<float, cv::Rect2f> run(
-            const cv::Mat &frame);
-
-        double last_inference_time_millis() const;
-    private:
-        inline static constexpr int INPUT_IMG_WIDTH = 288;
-        inline static constexpr int INPUT_IMG_HEIGHT = 224;
-        Ort::Session session_{nullptr};
-        // Inputs / outputs
-        cv::Mat scaled_frame_{}, input_mat_{};
-        Ort::Value input_val_{nullptr}, output_val_{nullptr};
-        std::array<float, 5> results_;
-        double last_inference_time_ = 0;
-};
-
-
-class PoseEstimator
-{
-    public:
-        struct Face
-        {
-            std::array<float,4> rotation; // Quaternion, (w, x, y, z)
-            cv::Rect2f box;
-            cv::Point2f center;
-            float size;
-        };
-
-        PoseEstimator(Ort::MemoryInfo &allocator_info,
-                        Ort::Session &&session);
-        /** Inference
-        *
-        * Coordinates are defined wrt. the image space of the input `frame`.
-        * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom)
-        */
-        std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);
-        // Returns an image compatible with the 'frame' image for displaying.
-        cv::Mat last_network_input() const;
-        double last_inference_time_millis() const;
-    private:
-        // Operates on the private image data members
-        int find_input_intensity_90_pct_quantile() const;
-
-        int64_t model_version_ = 0;  // Queried meta data from the ONNX file
-        Ort::Session session_{nullptr};  // ONNX's runtime context for running the model
-        Ort::Allocator allocator_;   // Memory allocator for tensors
-        // Inputs
-        cv::Mat scaled_frame_{}, input_mat_{};  // Input. One is the original crop, the other is rescaled (?)
-        std::vector<Ort::Value> input_val_;    // Tensors to put into the model
-        std::vector<const char*> input_names_; // Refers to the names in the onnx model. 
-        // Outputs
-        cv::Vec<float, 3> output_coord_{};  // 2d Coordinate and head size output.
-        cv::Vec<float, 4> output_quat_{};   //  Quaternion output
-        cv::Vec<float, 4> output_box_{};    // Bounding box output
-        std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in.
-        std::vector<const char*> output_names_; // Refers to the names in the onnx model. 
-        size_t num_recurrent_states_ = 0;
-        double last_inference_time_ = 0;
-};
-
-
-class Preview
-{
-public:
-    void init(const cv_video_widget& widget);
-    void copy_video_frame(const cv::Mat& frame);
-    void draw_gizmos(
-        const std::optional<PoseEstimator::Face> &face,
-        const Affine& pose,
-        const std::optional<cv::Rect2f>& last_roi,
-        const std::optional<cv::Rect2f>& last_localizer_roi,
-        const cv::Point2f& neckjoint_position);
-    void overlay_netinput(const cv::Mat& netinput);
-    void draw_fps(double fps, double last_inference_time);
-    void copy_to_widget(cv_video_widget& widget);
-private:
-    // Transform from camera frame to preview
-    cv::Rect2f transform(const cv::Rect2f& r) const;
-    cv::Point2f transform(const cv::Point2f& p) const;
-    float transform(float s) const;
-
-    cv::Mat preview_image_;
-    cv::Size preview_size_ = { 0, 0 };
-    float scale_ = 1.f;  
-    cv::Point2f offset_ = { 0.f, 0.f};
-};
-
-
 class NeuralNetTracker : protected virtual QThread, public ITracker
 {
     Q_OBJECT
@@ -214,7 +122,10 @@ private:
         const std::optional<PoseEstimator::Face> &face,
         const Affine& pose);
     void update_fps(double dt);
-    Affine compute_pose(const PoseEstimator::Face &face) const;
+    // Secretly applies filtering while computing the pose in 3d space.
+    QuatPose compute_filtered_pose(const PoseEstimator::Face &face);
+    // Compute the pose in 3d space taking the network outputs
+    QuatPose transform_to_world_pose(const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) const;
 
     Settings settings_;
     std::optional<Localizer> localizer_;
@@ -227,7 +138,7 @@ private:
     std::array<cv::Mat,2> downsized_original_images_ = {}; // Image pyramid
     std::optional<cv::Rect2f> last_localizer_roi_;
     std::optional<cv::Rect2f> last_roi_;
-    static constexpr float HEAD_SIZE_MM = 200.f;
+    static constexpr float HEAD_SIZE_MM = 200.f; // In the vertical. Approximately.
 
     mutable QMutex stats_mtx_;
     double fps_ = 0;
@@ -238,8 +149,9 @@ private:
     int num_threads_ = 1;
     bool is_visible_ = true;
 
-    QMutex mtx_; // Protects the pose
-    Affine pose_;
+    QMutex mtx_ = {}; // Protects the pose
+    std::optional<QuatPose> last_pose_ = {};
+    Affine last_pose_affine_ = {};
 
     Preview preview_;
     std::unique_ptr<cv_video_widget> video_widget_;
@@ -288,6 +200,15 @@ class NeuralNetMetadata : public Metadata
 
 } // neuralnet_tracker_ns
 
+
+namespace neuralnet_tracker_tests
+{
+
+void run();
+
+}
+
+
 using neuralnet_tracker_ns::NeuralNetTracker;
 using neuralnet_tracker_ns::NeuralNetDialog;
 using neuralnet_tracker_ns::NeuralNetMetadata;
author	Michael Welter <michael@welter-4d.de>	2022-09-11 20:55:26 +0200
committer	Stanislaw Halik <sthalik@misaki.pl>	2022-11-01 13:51:35 +0100
commit	08f1fcad1c74e25f97641a0ccbd229b267ec528c (patch)
tree	000b1b276bc7df4a74fd493dab05bcce68801de8 /tracker-neuralnet/ftnoir_tracker_neuralnet.h
parent	77d6abaf53dbe2ee6334bd59b112e25d694a2f65 (diff)