Merge pull request #1266 from DaWelter/feature-tracker-neuralnet-prunstable

Add a face alignment based tracker
author: Stanislaw Halik <sthalik@misaki.pl> 2021-05-03 11:04:08 +0000
committer: GitHub <noreply@github.com> 2021-05-03 11:04:08 +0000
commit: c651d84eaa5f295a45e95edab28db9ca05013fe7 (patch)
tree: 394d8ca7c1aedf058208191b19b8e3e9fe4d134c /tracker-neuralnet/ftnoir_tracker_neuralnet.h
parent: 058942f40e17e091b91df5436d771d61203ccc73 (diff)
parent: 7e582247ab0942398225b3e190d9fadafcf19950 (diff)
1 files changed, 231 insertions, 0 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
new file mode 100644
index 00000000..e26689a4
--- /dev/null
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
@@ -0,0 +1,231 @@
+/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ */
+
+#pragma once
+
+#include "options/options.hpp"
+#include "api/plugin-api.hpp"
+#include "cv/video-widget.hpp"
+#include "cv/translation-calibrator.hpp"
+#include "cv/numeric.hpp"
+#include "compat/timer.hpp"
+#include "video/camera.hpp"
+#include "cv/affine.hpp"
+
+#include <QObject>
+#include <QThread>
+#include <QMutex>
+#include <QHBoxLayout>
+#include <QDialog>
+#include <QTimer>
+
+#include <memory>
+#include <cinttypes>
+
+#include <onnxruntime_cxx_api.h>
+
+#include <opencv2/core.hpp>
+#include <opencv2/core/types.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "ui_neuralnet-trackercontrols.h"
+
+namespace neuralnet_tracker_ns
+{
+
+
+using namespace options;
+
+
+enum fps_choices
+{
+    fps_default = 0,
+    fps_30      = 1,
+    fps_60      = 2,
+    fps_MAX     = 3
+};
+
+
+struct settings : opts {
+    value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters
+               offset_up { b, "offset-up", 0 },
+               offset_right { b, "offset-right", 0 };
+    value<QString> camera_name { b, "camera-name", ""};
+    value<int> fov { b, "field-of-view", 56 };
+    value<fps_choices> force_fps { b, "force-fps", fps_default };
+    value<bool> show_network_input { b, "show-network-input", false };
+    settings();
+};
+
+
+struct CamIntrinsics
+{
+    float focal_length_w;
+    float focal_length_h;
+    float fov_w;
+    float fov_h;
+};
+
+
+class Localizer
+{
+    public:
+        Localizer(Ort::MemoryInfo &allocator_info,
+                    Ort::Session &&session);
+        
+        // Returns bounding wrt image coordinate of the input image
+        // The preceeding float is the score for being a face normalized to [0,1].
+        std::pair<float, cv::Rect2f> run(
+            const cv::Mat &frame);
+
+    private:
+        inline static constexpr int input_img_width = 288;
+        inline static constexpr int input_img_height = 224;
+        Ort::Session session{nullptr};
+        // Inputs / outputs
+        cv::Mat scaled_frame{}, input_mat{};
+        Ort::Value input_val{nullptr}, output_val{nullptr};
+        std::array<float, 5> results;
+};
+
+
+class PoseEstimator
+{
+    public:
+        struct Face
+        {
+            std::array<float,4> rotation; // Quaternion, (w, x, y, z)
+            // The following quantities are defined wrt the image space of the input
+            cv::Rect2f box;
+            cv::Point2f center;
+            float size;
+        };
+
+        PoseEstimator(Ort::MemoryInfo &allocator_info,
+                        Ort::Session &&session);
+        // Inference
+        std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);
+        // Returns an image compatible with the 'frame' image for displaying.
+        cv::Mat last_network_input() const;
+
+    private:
+        // Operates on the private image data members
+        int find_input_intensity_90_pct_quantile() const;
+
+        inline static constexpr int input_img_width = 129;
+        inline static constexpr int input_img_height = 129;
+        Ort::Session session{nullptr};
+        // Inputs
+        cv::Mat scaled_frame{}, input_mat{};
+        Ort::Value input_val{nullptr};
+        // Outputs
+        cv::Vec<float, 3> output_coord{};
+        cv::Vec<float, 4> output_quat{};
+        cv::Vec<float, 4> output_box{};
+        Ort::Value output_val[3] = {
+            Ort::Value{nullptr}, 
+            Ort::Value{nullptr}, 
+            Ort::Value{nullptr}};
+};
+
+
+class neuralnet_tracker : protected virtual QThread, public ITracker
+{
+    Q_OBJECT
+public:
+    neuralnet_tracker();
+    ~neuralnet_tracker() override;
+    module_status start_tracker(QFrame* frame) override;
+    void data(double *data) override;
+    void run() override;
+    Affine pose();
+
+    QMutex camera_mtx;
+    std::unique_ptr<video::impl::camera> camera;
+
+private:
+    bool detect();
+    bool open_camera();
+    void set_intrinsics();
+    bool load_and_initialize_model();
+    void draw_gizmos(
+        cv::Mat frame,  
+        const PoseEstimator::Face &face,
+        const Affine& pose) const;
+    void update_fps(double dt);
+
+    Affine compute_pose(const PoseEstimator::Face &face) const;
+    numeric_types::vec3 image_to_world(float x, float y, float size, float real_size) const;
+    numeric_types::vec2 world_to_image(const numeric_types::vec3& p) const;
+
+    settings s;
+    std::optional<Localizer> localizer;
+    std::optional<PoseEstimator> poseestimator;
+    Ort::Env env{nullptr};
+    Ort::MemoryInfo allocator_info{nullptr};
+
+    CamIntrinsics intrinsics{};
+    cv::Mat frame, grayscale;
+    std::optional<cv::Rect2f> last_localizer_roi;
+    std::optional<cv::Rect2f> last_roi;
+    static constexpr float head_size_mm = 200.f;
+
+    double fps = 0;
+    double max_frame_time = 0;
+    static constexpr double RC = .25;
+
+    QMutex mtx; // Protects the pose
+    Affine pose_;
+
+    std::unique_ptr<cv_video_widget> videoWidget;
+    std::unique_ptr<QHBoxLayout> layout;
+};
+
+
+class neuralnet_dialog : public ITrackerDialog
+{
+    Q_OBJECT
+public:
+    neuralnet_dialog();
+    void register_tracker(ITracker * x) override;
+    void unregister_tracker() override;
+private:
+    void make_fps_combobox();
+
+    Ui::Form ui;
+    settings s;
+    
+    // Calibration code mostly taken from point tracker
+    QTimer calib_timer;
+    TranslationCalibrator trans_calib;
+    QMutex calibrator_mutex;
+
+    neuralnet_tracker* tracker = nullptr;
+
+private Q_SLOTS:
+    void doOK();
+    void doCancel();
+    void camera_settings();
+    void update_camera_settings_state(const QString& name);
+    void startstop_trans_calib(bool start);
+    void trans_calib_step();
+};
+
+
+class neuralnet_metadata : public Metadata
+{
+    Q_OBJECT
+    QString name() override { return QString("neuralnet tracker"); }
+    QIcon icon() override { return QIcon(":/images/neuralnet.png"); }
+};
+
+
+} // neuralnet_tracker_ns
+
+using neuralnet_tracker_ns::neuralnet_tracker;
+using neuralnet_tracker_ns::neuralnet_dialog;
+using neuralnet_tracker_ns::neuralnet_metadata;
+\ No newline at end of file
author	Stanislaw Halik <sthalik@misaki.pl>	2021-05-03 11:04:08 +0000
committer	GitHub <noreply@github.com>	2021-05-03 11:04:08 +0000
commit	c651d84eaa5f295a45e95edab28db9ca05013fe7 (patch)
tree	394d8ca7c1aedf058208191b19b8e3e9fe4d134c /tracker-neuralnet/ftnoir_tracker_neuralnet.h
parent	058942f40e17e091b91df5436d771d61203ccc73 (diff)
parent	7e582247ab0942398225b3e190d9fadafcf19950 (diff)