summaryrefslogtreecommitdiffhomepage
path: root/tracker-neuralnet/ftnoir_tracker_neuralnet.h
diff options
context:
space:
mode:
Diffstat (limited to 'tracker-neuralnet/ftnoir_tracker_neuralnet.h')
-rw-r--r--tracker-neuralnet/ftnoir_tracker_neuralnet.h230
1 files changed, 114 insertions, 116 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
index e26689a4..ce85dcd5 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
@@ -7,11 +7,15 @@
#pragma once
+#include "ui_neuralnet-trackercontrols.h"
+#include "model_adapters.h"
+#include "deadzone_filter.h"
+#include "preview.h"
+
#include "options/options.hpp"
#include "api/plugin-api.hpp"
#include "cv/video-widget.hpp"
#include "cv/translation-calibrator.hpp"
-#include "cv/numeric.hpp"
#include "compat/timer.hpp"
#include "video/camera.hpp"
#include "cv/affine.hpp"
@@ -25,14 +29,11 @@
#include <memory>
#include <cinttypes>
-
-#include <onnxruntime_cxx_api.h>
+#include <array>
#include <opencv2/core.hpp>
-#include <opencv2/core/types.hpp>
#include <opencv2/imgproc.hpp>
-#include "ui_neuralnet-trackercontrols.h"
namespace neuralnet_tracker_ns
{
@@ -46,11 +47,36 @@ enum fps_choices
fps_default = 0,
fps_30 = 1,
fps_60 = 2,
- fps_MAX = 3
+ fps_75 = 3,
+ fps_125 = 4,
+ fps_200 = 5,
+ fps_50 = 6,
+ fps_100 = 7,
+ fps_120 = 8,
+ fps_300 = 9,
+ fps_250 = 10,
+ fps_MAX = 11,
};
+struct resolution_tuple
+{
+ int width;
+ int height;
+};
+
+static const std::array<resolution_tuple, 7> resolution_choices =
+{{
+ { 320, 240 },
+ { 640, 480 },
+ { 800, 600 },
+ { 1024, 768 },
+ { 1280, 720 },
+ { 1920, 1080},
+ { 0, 0 }
+}};
+
-struct settings : opts {
+struct Settings : opts {
value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters
offset_up { b, "offset-up", 0 },
offset_right { b, "offset-right", 0 };
@@ -58,7 +84,15 @@ struct settings : opts {
value<int> fov { b, "field-of-view", 56 };
value<fps_choices> force_fps { b, "force-fps", fps_default };
value<bool> show_network_input { b, "show-network-input", false };
- settings();
+ value<double> roi_filter_alpha{ b, "roi-filter-alpha", 1. };
+ value<double> roi_zoom{ b, "roi-zoom", 1. };
+ value<bool> use_mjpeg { b, "use-mjpeg", false };
+ value<int> num_threads { b, "num-threads", 1 };
+ value<int> resolution { b, "force-resolution", 0 };
+ value<double> deadzone_size { b, "deadzone-size", 1. };
+ value<double> deadzone_hardness { b, "deadzone-hardness", 1.5 };
+ value<QString> posenet_file { b, "posenet-file", "head-pose-0.3-big-quantized.onnx" };
+ Settings();
};
@@ -71,152 +105,107 @@ struct CamIntrinsics
};
-class Localizer
-{
- public:
- Localizer(Ort::MemoryInfo &allocator_info,
- Ort::Session &&session);
-
- // Returns bounding wrt image coordinate of the input image
- // The preceeding float is the score for being a face normalized to [0,1].
- std::pair<float, cv::Rect2f> run(
- const cv::Mat &frame);
-
- private:
- inline static constexpr int input_img_width = 288;
- inline static constexpr int input_img_height = 224;
- Ort::Session session{nullptr};
- // Inputs / outputs
- cv::Mat scaled_frame{}, input_mat{};
- Ort::Value input_val{nullptr}, output_val{nullptr};
- std::array<float, 5> results;
-};
-
-
-class PoseEstimator
-{
- public:
- struct Face
- {
- std::array<float,4> rotation; // Quaternion, (w, x, y, z)
- // The following quantities are defined wrt the image space of the input
- cv::Rect2f box;
- cv::Point2f center;
- float size;
- };
-
- PoseEstimator(Ort::MemoryInfo &allocator_info,
- Ort::Session &&session);
- // Inference
- std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);
- // Returns an image compatible with the 'frame' image for displaying.
- cv::Mat last_network_input() const;
-
- private:
- // Operates on the private image data members
- int find_input_intensity_90_pct_quantile() const;
-
- inline static constexpr int input_img_width = 129;
- inline static constexpr int input_img_height = 129;
- Ort::Session session{nullptr};
- // Inputs
- cv::Mat scaled_frame{}, input_mat{};
- Ort::Value input_val{nullptr};
- // Outputs
- cv::Vec<float, 3> output_coord{};
- cv::Vec<float, 4> output_quat{};
- cv::Vec<float, 4> output_box{};
- Ort::Value output_val[3] = {
- Ort::Value{nullptr},
- Ort::Value{nullptr},
- Ort::Value{nullptr}};
-};
-
-
-class neuralnet_tracker : protected virtual QThread, public ITracker
+class NeuralNetTracker : protected virtual QThread, public ITracker
{
- Q_OBJECT
+ //Q_OBJECT
public:
- neuralnet_tracker();
- ~neuralnet_tracker() override;
+ NeuralNetTracker();
+ ~NeuralNetTracker() override;
module_status start_tracker(QFrame* frame) override;
void data(double *data) override;
void run() override;
Affine pose();
+ std::tuple<cv::Size, double, double> stats() const;
- QMutex camera_mtx;
- std::unique_ptr<video::impl::camera> camera;
+ QMutex camera_mtx_;
+ std::unique_ptr<video::impl::camera> camera_;
private:
bool detect();
bool open_camera();
void set_intrinsics();
+ cv::Mat prepare_input_image(const video::frame& frame);
bool load_and_initialize_model();
void draw_gizmos(
- cv::Mat frame,
- const PoseEstimator::Face &face,
- const Affine& pose) const;
+ const std::optional<PoseEstimator::Face> &face,
+ const Affine& pose);
void update_fps(double dt);
-
- Affine compute_pose(const PoseEstimator::Face &face) const;
- numeric_types::vec3 image_to_world(float x, float y, float size, float real_size) const;
- numeric_types::vec2 world_to_image(const numeric_types::vec3& p) const;
-
- settings s;
- std::optional<Localizer> localizer;
- std::optional<PoseEstimator> poseestimator;
- Ort::Env env{nullptr};
- Ort::MemoryInfo allocator_info{nullptr};
-
- CamIntrinsics intrinsics{};
- cv::Mat frame, grayscale;
- std::optional<cv::Rect2f> last_localizer_roi;
- std::optional<cv::Rect2f> last_roi;
- static constexpr float head_size_mm = 200.f;
-
- double fps = 0;
- double max_frame_time = 0;
+ // Secretly applies filtering while computing the pose in 3d space.
+ QuatPose compute_filtered_pose(const PoseEstimator::Face &face);
+ // Compute the pose in 3d space taking the network outputs
+ QuatPose transform_to_world_pose(const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) const;
+ QString get_posenet_filename() const;
+
+ Settings settings_;
+ std::optional<Localizer> localizer_;
+ std::optional<PoseEstimator> poseestimator_;
+ Ort::Env env_{nullptr};
+ Ort::MemoryInfo allocator_info_{nullptr};
+
+ CamIntrinsics intrinsics_{};
+ cv::Mat grayscale_;
+ std::array<cv::Mat,2> downsized_original_images_ = {}; // Image pyramid
+ std::optional<cv::Rect2f> last_localizer_roi_;
+ std::optional<cv::Rect2f> last_roi_;
+ static constexpr float HEAD_SIZE_MM = 200.f; // In the vertical. Approximately.
+
+ mutable QMutex stats_mtx_;
+ double fps_ = 0;
+ double inference_time_ = 0;
+ cv::Size resolution_ = {};
+
static constexpr double RC = .25;
+ int num_threads_ = 1;
+ bool is_visible_ = true;
- QMutex mtx; // Protects the pose
- Affine pose_;
+ QMutex mtx_ = {}; // Protects the pose
+ std::optional<QuatPose> last_pose_ = {};
+ Affine last_pose_affine_ = {};
- std::unique_ptr<cv_video_widget> videoWidget;
- std::unique_ptr<QHBoxLayout> layout;
+ Preview preview_;
+ std::unique_ptr<cv_video_widget> video_widget_;
+ std::unique_ptr<QHBoxLayout> layout_;
};
-class neuralnet_dialog : public ITrackerDialog
+class NeuralNetDialog : public ITrackerDialog
{
Q_OBJECT
public:
- neuralnet_dialog();
+ NeuralNetDialog();
void register_tracker(ITracker * x) override;
void unregister_tracker() override;
+
+ bool embeddable() noexcept override;
+ void set_buttons_visible(bool x) override;
private:
void make_fps_combobox();
+ void make_resolution_combobox();
- Ui::Form ui;
- settings s;
-
+ Ui::Form ui_;
+ Settings settings_;
// Calibration code mostly taken from point tracker
- QTimer calib_timer;
- TranslationCalibrator trans_calib;
- QMutex calibrator_mutex;
-
- neuralnet_tracker* tracker = nullptr;
+ QTimer calib_timer_;
+ TranslationCalibrator trans_calib_;
+ QMutex calibrator_mutex_;
+ QTimer tracker_status_poll_timer_;
+ NeuralNetTracker* tracker_ = nullptr;
private Q_SLOTS:
+ void save() override;
+ void reload() override;
void doOK();
void doCancel();
void camera_settings();
void update_camera_settings_state(const QString& name);
void startstop_trans_calib(bool start);
void trans_calib_step();
+ void status_poll();
+ void onSelectPoseNetFile();
};
-class neuralnet_metadata : public Metadata
+class NeuralNetMetadata : public Metadata
{
Q_OBJECT
QString name() override { return QString("neuralnet tracker"); }
@@ -226,6 +215,15 @@ class neuralnet_metadata : public Metadata
} // neuralnet_tracker_ns
-using neuralnet_tracker_ns::neuralnet_tracker;
-using neuralnet_tracker_ns::neuralnet_dialog;
-using neuralnet_tracker_ns::neuralnet_metadata; \ No newline at end of file
+
+namespace neuralnet_tracker_tests
+{
+
+void run();
+
+}
+
+
+using neuralnet_tracker_ns::NeuralNetTracker;
+using neuralnet_tracker_ns::NeuralNetDialog;
+using neuralnet_tracker_ns::NeuralNetMetadata;