/* Copyright (c) 2021 Michael Welter * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. */ #pragma once #include "options/options.hpp" #include "api/plugin-api.hpp" #include "cv/video-widget.hpp" #include "cv/translation-calibrator.hpp" #include "cv/numeric.hpp" #include "compat/timer.hpp" #include "video/camera.hpp" #include "cv/affine.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ui_neuralnet-trackercontrols.h" namespace neuralnet_tracker_ns { using namespace options; enum fps_choices { fps_default = 0, fps_30 = 1, fps_60 = 2, fps_MAX = 3 }; struct resolution_tuple { int width; int height; }; static const std::array resolution_choices = {{ { 320, 240 }, { 640, 480 }, { 800, 600 }, { 1024, 768 }, { 1280, 720 }, { 1920, 1080}, { 0, 0 } }}; struct Settings : opts { value offset_fwd { b, "offset-fwd", 200 }, // Millimeters offset_up { b, "offset-up", 0 }, offset_right { b, "offset-right", 0 }; value camera_name { b, "camera-name", ""}; value fov { b, "field-of-view", 56 }; value force_fps { b, "force-fps", fps_default }; value show_network_input { b, "show-network-input", false }; value roi_filter_alpha{ b, "roi-filter-alpha", 1. }; value roi_zoom{ b, "roi-zoom", 1. }; value use_mjpeg { b, "use-mjpeg", false }; value num_threads { b, "num-threads", 1 }; value resolution { b, "force-resolution", 0 }; Settings(); }; struct CamIntrinsics { float focal_length_w; float focal_length_h; float fov_w; float fov_h; }; class Localizer { public: Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session); // Returns bounding wrt image coordinate of the input image // The preceeding float is the score for being a face normalized to [0,1]. std::pair run( const cv::Mat &frame); double last_inference_time_millis() const; private: inline static constexpr int input_img_width = 288; inline static constexpr int input_img_height = 224; Ort::Session session{nullptr}; // Inputs / outputs cv::Mat scaled_frame{}, input_mat{}; Ort::Value input_val{nullptr}, output_val{nullptr}; std::array results; double last_inference_time = 0; }; class PoseEstimator { public: struct Face { std::array rotation; // Quaternion, (w, x, y, z) // The following quantities are defined wrt the image space of the input cv::Rect2f box; cv::Point2f center; float size; }; PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session); // Inference std::optional run(const cv::Mat &frame, const cv::Rect &box); // Returns an image compatible with the 'frame' image for displaying. cv::Mat last_network_input() const; double last_inference_time_millis() const; private: // Operates on the private image data members int find_input_intensity_90_pct_quantile() const; int64_t model_version = 0; Ort::Session session{nullptr}; Ort::Allocator allocator; // Inputs cv::Mat scaled_frame{}, input_mat{}; std::vector input_val; std::vector input_names; // Outputs cv::Vec output_coord{}; cv::Vec output_quat{}; cv::Vec output_box{}; std::vector output_val; std::vector output_names; size_t num_recurrent_states = 0; double last_inference_time = 0; }; class Preview { public: void init(const cv_video_widget& widget); void copy_video_frame(const cv::Mat& frame); void draw_gizmos( const std::optional &face, const Affine& pose, const std::optional& last_roi, const std::optional& last_localizer_roi, const cv::Point2f& neckjoint_position); void overlay_netinput(const cv::Mat& netinput); void draw_fps(double fps, double last_inference_time); void copy_to_widget(cv_video_widget& widget); private: // Transform from camera frame to preview cv::Rect2f transform(const cv::Rect2f& r) const; cv::Point2f transform(const cv::Point2f& p) const; float transform(float s) const; cv::Mat preview_image_; cv::Size preview_size_ = { 0, 0 }; float scale_ = 1.f; cv::Point2f offset_ = { 0.f, 0.f}; }; class neuralnet_tracker : protected virtual QThread, public ITracker { Q_OBJECT public: neuralnet_tracker(); ~neuralnet_tracker() override; module_status start_tracker(QFrame* frame) override; void data(double *data) override; void run() override; Affine pose(); std::tuple stats() const; QMutex camera_mtx; std::unique_ptr camera; private: bool detect(); bool open_camera(); void set_intrinsics(); cv::Mat prepare_input_image(const video::frame& frame); bool load_and_initialize_model(); void draw_gizmos( const std::optional &face, const Affine& pose); void update_fps(double dt); Affine compute_pose(const PoseEstimator::Face &face) const; Settings settings; std::optional localizer; std::optional poseestimator; Ort::Env env{nullptr}; Ort::MemoryInfo allocator_info{nullptr}; CamIntrinsics intrinsics{}; cv::Mat grayscale_; std::array downsized_original_images_ = {}; // Image pyramid std::optional last_localizer_roi; std::optional last_roi; static constexpr float head_size_mm = 200.f; mutable QMutex stats_mtx_; double fps = 0; double inference_time_ = 0; cv::Size resolution_ = {}; static constexpr double RC = .25; int num_threads = 1; bool is_visible_ = true; QMutex mtx; // Protects the pose Affine pose_; Preview preview_; std::unique_ptr videoWidget; std::unique_ptr layout; }; class neuralnet_dialog : public ITrackerDialog { Q_OBJECT public: neuralnet_dialog(); void register_tracker(ITracker * x) override; void unregister_tracker() override; private: void make_fps_combobox(); void make_resolution_combobox(); Ui::Form ui; Settings settings; // Calibration code mostly taken from point tracker QTimer calib_timer; TranslationCalibrator trans_calib; QMutex calibrator_mutex; QTimer tracker_status_poll_timer; neuralnet_tracker* tracker = nullptr; private Q_SLOTS: void doOK(); void doCancel(); void camera_settings(); void update_camera_settings_state(const QString& name); void startstop_trans_calib(bool start); void trans_calib_step(); void status_poll(); }; class neuralnet_metadata : public Metadata { Q_OBJECT QString name() override { return QString("neuralnet tracker"); } QIcon icon() override { return QIcon(":/images/neuralnet.png"); } }; } // neuralnet_tracker_ns using neuralnet_tracker_ns::neuralnet_tracker; using neuralnet_tracker_ns::neuralnet_dialog; using neuralnet_tracker_ns::neuralnet_metadata;