summaryrefslogtreecommitdiffhomepage
path: root/tracker-neuralnet/ftnoir_tracker_neuralnet.h
diff options
context:
space:
mode:
Diffstat (limited to 'tracker-neuralnet/ftnoir_tracker_neuralnet.h')
-rw-r--r--tracker-neuralnet/ftnoir_tracker_neuralnet.h127
1 files changed, 24 insertions, 103 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
index 9b481186..9e0374da 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
@@ -7,6 +7,11 @@
#pragma once
+#include "ui_neuralnet-trackercontrols.h"
+#include "model_adapters.h"
+#include "deadzone_filter.h"
+#include "preview.h"
+
#include "options/options.hpp"
#include "api/plugin-api.hpp"
#include "cv/video-widget.hpp"
@@ -27,13 +32,9 @@
#include <cinttypes>
#include <array>
-#include <onnxruntime_cxx_api.h>
-
#include <opencv2/core.hpp>
-#include <opencv2/core/types.hpp>
#include <opencv2/imgproc.hpp>
-#include "ui_neuralnet-trackercontrols.h"
namespace neuralnet_tracker_ns
{
@@ -81,6 +82,8 @@ struct Settings : opts {
value<bool> use_mjpeg { b, "use-mjpeg", false };
value<int> num_threads { b, "num-threads", 1 };
value<int> resolution { b, "force-resolution", 0 };
+ value<double> deadzone_size { b, "deadzone-size", 1. };
+ value<double> deadzone_hardness { b, "deadzone-hardness", 1.5 };
Settings();
};
@@ -94,101 +97,6 @@ struct CamIntrinsics
};
-class Localizer
-{
- public:
- Localizer(Ort::MemoryInfo &allocator_info,
- Ort::Session &&session);
-
- // Returns bounding wrt image coordinate of the input image
- // The preceeding float is the score for being a face normalized to [0,1].
- std::pair<float, cv::Rect2f> run(
- const cv::Mat &frame);
-
- double last_inference_time_millis() const;
- private:
- inline static constexpr int INPUT_IMG_WIDTH = 288;
- inline static constexpr int INPUT_IMG_HEIGHT = 224;
- Ort::Session session_{nullptr};
- // Inputs / outputs
- cv::Mat scaled_frame_{}, input_mat_{};
- Ort::Value input_val_{nullptr}, output_val_{nullptr};
- std::array<float, 5> results_;
- double last_inference_time_ = 0;
-};
-
-
-class PoseEstimator
-{
- public:
- struct Face
- {
- std::array<float,4> rotation; // Quaternion, (w, x, y, z)
- cv::Rect2f box;
- cv::Point2f center;
- float size;
- };
-
- PoseEstimator(Ort::MemoryInfo &allocator_info,
- Ort::Session &&session);
- /** Inference
- *
- * Coordinates are defined wrt. the image space of the input `frame`.
- * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom)
- */
- std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);
- // Returns an image compatible with the 'frame' image for displaying.
- cv::Mat last_network_input() const;
- double last_inference_time_millis() const;
- private:
- // Operates on the private image data members
- int find_input_intensity_90_pct_quantile() const;
-
- int64_t model_version_ = 0; // Queried meta data from the ONNX file
- Ort::Session session_{nullptr}; // ONNX's runtime context for running the model
- Ort::Allocator allocator_; // Memory allocator for tensors
- // Inputs
- cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?)
- std::vector<Ort::Value> input_val_; // Tensors to put into the model
- std::vector<const char*> input_names_; // Refers to the names in the onnx model.
- // Outputs
- cv::Vec<float, 3> output_coord_{}; // 2d Coordinate and head size output.
- cv::Vec<float, 4> output_quat_{}; // Quaternion output
- cv::Vec<float, 4> output_box_{}; // Bounding box output
- std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in.
- std::vector<const char*> output_names_; // Refers to the names in the onnx model.
- size_t num_recurrent_states_ = 0;
- double last_inference_time_ = 0;
-};
-
-
-class Preview
-{
-public:
- void init(const cv_video_widget& widget);
- void copy_video_frame(const cv::Mat& frame);
- void draw_gizmos(
- const std::optional<PoseEstimator::Face> &face,
- const Affine& pose,
- const std::optional<cv::Rect2f>& last_roi,
- const std::optional<cv::Rect2f>& last_localizer_roi,
- const cv::Point2f& neckjoint_position);
- void overlay_netinput(const cv::Mat& netinput);
- void draw_fps(double fps, double last_inference_time);
- void copy_to_widget(cv_video_widget& widget);
-private:
- // Transform from camera frame to preview
- cv::Rect2f transform(const cv::Rect2f& r) const;
- cv::Point2f transform(const cv::Point2f& p) const;
- float transform(float s) const;
-
- cv::Mat preview_image_;
- cv::Size preview_size_ = { 0, 0 };
- float scale_ = 1.f;
- cv::Point2f offset_ = { 0.f, 0.f};
-};
-
-
class NeuralNetTracker : protected virtual QThread, public ITracker
{
Q_OBJECT
@@ -214,7 +122,10 @@ private:
const std::optional<PoseEstimator::Face> &face,
const Affine& pose);
void update_fps(double dt);
- Affine compute_pose(const PoseEstimator::Face &face) const;
+ // Secretly applies filtering while computing the pose in 3d space.
+ QuatPose compute_filtered_pose(const PoseEstimator::Face &face);
+ // Compute the pose in 3d space taking the network outputs
+ QuatPose transform_to_world_pose(const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) const;
Settings settings_;
std::optional<Localizer> localizer_;
@@ -227,7 +138,7 @@ private:
std::array<cv::Mat,2> downsized_original_images_ = {}; // Image pyramid
std::optional<cv::Rect2f> last_localizer_roi_;
std::optional<cv::Rect2f> last_roi_;
- static constexpr float HEAD_SIZE_MM = 200.f;
+ static constexpr float HEAD_SIZE_MM = 200.f; // In the vertical. Approximately.
mutable QMutex stats_mtx_;
double fps_ = 0;
@@ -238,8 +149,9 @@ private:
int num_threads_ = 1;
bool is_visible_ = true;
- QMutex mtx_; // Protects the pose
- Affine pose_;
+ QMutex mtx_ = {}; // Protects the pose
+ std::optional<QuatPose> last_pose_ = {};
+ Affine last_pose_affine_ = {};
Preview preview_;
std::unique_ptr<cv_video_widget> video_widget_;
@@ -288,6 +200,15 @@ class NeuralNetMetadata : public Metadata
} // neuralnet_tracker_ns
+
+namespace neuralnet_tracker_tests
+{
+
+void run();
+
+}
+
+
using neuralnet_tracker_ns::NeuralNetTracker;
using neuralnet_tracker_ns::NeuralNetDialog;
using neuralnet_tracker_ns::NeuralNetMetadata;