From e1d17f2d413de5f548931eaf9dfed2155e830096 Mon Sep 17 00:00:00 2001 From: Michael Welter Date: Wed, 18 May 2022 06:36:25 +0200 Subject: tracker/nn: Improve preview * The preview image is now generated with the dimensions of the widget * The pose visualization is added afterwards adjusted to the preview size * The fps / inference time readout is moved to the settings dialog * The actual obtained resolution from the camera is also shown * Dialog layout is changed Note: Switching to using underscore to mark class member vars. It's not consistently applied yet. --- tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 206 +++++-- tracker-neuralnet/ftnoir_tracker_neuralnet.h | 44 +- tracker-neuralnet/lang/nl_NL.ts | 8 + tracker-neuralnet/lang/ru_RU.ts | 8 + tracker-neuralnet/lang/stub.ts | 8 + tracker-neuralnet/lang/zh_CN.ts | 8 + tracker-neuralnet/neuralnet-trackercontrols.ui | 787 ++++++++++++++----------- 7 files changed, 664 insertions(+), 405 deletions(-) diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index f849f4e1..352baf29 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -15,6 +15,7 @@ #include #include #include "compat/timer.hpp" +#include "compat/check-visible.hpp" #include #ifdef _MSC_VER @@ -54,6 +55,18 @@ std::string convert(const QString &s) { return s.toStdString(); } #endif +template +struct OnScopeExit +{ + explicit OnScopeExit(F&& f) : f_{ f } {} + ~OnScopeExit() noexcept + { + f_(); + } + F f_; +}; + + float sigmoid(float x) { return 1.f/(1.f + std::exp(-x)); @@ -88,7 +101,6 @@ cv::Rect make_crop_rect_multiple_of(const cv::Size &size, int multiple) ); } - template cv::Rect_ squarize(const cv::Rect_ &r) { @@ -592,12 +604,20 @@ double PoseEstimator::last_inference_time_millis() const bool neuralnet_tracker::detect() { + double inference_time = 0.; + + OnScopeExit update_inference_time{ [&]() { + + QMutexLocker lck{ &stats_mtx_ }; + inference_time_ = inference_time; + } }; + // Note: BGR colors! if (!last_localizer_roi || !last_roi || iou(*last_localizer_roi,*last_roi)<0.25) { - auto [p, rect] = localizer->run(grayscale); - last_inference_time += localizer->last_inference_time_millis(); + auto [p, rect] = localizer->run(grayscale_); + inference_time += localizer->last_inference_time_millis(); if (p > 0.5 || rect.height < 5 || rect.width < 5) { last_localizer_roi = rect; @@ -612,17 +632,17 @@ bool neuralnet_tracker::detect() if (!last_roi) { - draw_gizmos(frame, {}, {}); + draw_gizmos({}, {}); return false; } - auto face = poseestimator->run(grayscale, *last_roi); - last_inference_time += poseestimator->last_inference_time_millis(); + auto face = poseestimator->run(grayscale_, *last_roi); + inference_time += poseestimator->last_inference_time_millis(); if (!face) { last_roi.reset(); - draw_gizmos(frame, *face, {}); + draw_gizmos(*face, {}); return false; } @@ -646,7 +666,7 @@ bool neuralnet_tracker::detect() Affine pose = compute_pose(*face); - draw_gizmos(frame, *face, pose); + draw_gizmos(*face, pose); { QMutexLocker lck(&mtx); @@ -657,12 +677,31 @@ bool neuralnet_tracker::detect() } +void neuralnet_tracker::draw_gizmos( + const std::optional &face, + const Affine& pose) +{ + if (!is_visible_) + return; + + preview_.draw_gizmos(face, pose, last_roi, last_localizer_roi, world_to_image(pose.t, grayscale_.size(), intrinsics)); + + if (settings.show_network_input) + { + cv::Mat netinput = poseestimator->last_network_input(); + preview_.overlay_netinput(netinput); + } + + //preview_.draw_fps(fps, last_inference_time); +} + + Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const { // Compute the location the network outputs in 3d space. const mat33 rot_correction = compute_rotation_correction( - normalize(face.center, frame.rows, frame.cols), + normalize(face.center, grayscale_.rows, grayscale_.cols), intrinsics.focal_length_w); const mat33 m = rot_correction * quaternion_to_mat33( @@ -683,16 +722,15 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const const vec3 face_world_pos = image_to_world( face.center.x, face.center.y, face.size, head_size_mm, - frame.size(), + grayscale_.size(), intrinsics); // But this is in general not the location of the rotation joint in the neck. // So we need an extra offset. Which we determine by solving // z,y,z-pos = head_joint_loc + R_face * offset - const vec3 pos = face_world_pos + m * vec3{ - static_cast(settings.offset_fwd), + static_cast(settings.offset_fwd), static_cast(settings.offset_up), static_cast(settings.offset_right)}; @@ -700,27 +738,50 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const } -void neuralnet_tracker::draw_gizmos( - cv::Mat frame, +void Preview::init(const cv_video_widget& widget) +{ + auto [w,h] = widget.preview_size(); + preview_size_ = { w, h }; +} + + +void Preview::copy_video_frame(const cv::Mat& frame) +{ + cv::Rect roi = make_crop_rect_for_aspect(frame.size(), preview_size_.width, preview_size_.height); + + cv::resize(frame(roi), preview_image_, preview_size_, 0, 0, cv::INTER_NEAREST); + + offset_ = { (float)-roi.x, (float)-roi.y }; + scale_ = float(preview_image_.cols) / float(roi.width); +} + + +void Preview::draw_gizmos( const std::optional &face, - const Affine& pose) const + const Affine& pose, + const std::optional& last_roi, + const std::optional& last_localizer_roi, + const cv::Point2f& neckjoint_position) { + if (preview_image_.empty()) + return; + if (last_roi) { const int col = 255; - cv::rectangle(frame, *last_roi, cv::Scalar(0, col, 0), /*thickness=*/1); + cv::rectangle(preview_image_, transform(*last_roi), cv::Scalar(0, col, 0), /*thickness=*/1); } if (last_localizer_roi) { const int col = 255; - cv::rectangle(frame, *last_localizer_roi, cv::Scalar(col, 0, 255-col), /*thickness=*/1); + cv::rectangle(preview_image_, transform(*last_localizer_roi), cv::Scalar(col, 0, 255-col), /*thickness=*/1); } if (face) { if (face->size>=1.f) - cv::circle(frame, static_cast(face->center), int(face->size), cv::Scalar(255,255,255), 2); - cv::circle(frame, static_cast(face->center), 3, cv::Scalar(255,255,255), -1); + cv::circle(preview_image_, static_cast(transform(face->center)), int(transform(face->size)), cv::Scalar(255,255,255), 2); + cv::circle(preview_image_, static_cast(transform(face->center)), 3, cv::Scalar(255,255,255), -1); auto draw_coord_line = [&](int i, const cv::Scalar& color) { @@ -728,32 +789,57 @@ void neuralnet_tracker::draw_gizmos( const float vy = -pose.R(1,i); static constexpr float len = 100.f; cv::Point q = face->center + len*cv::Point2f{vx, vy}; - cv::line(frame, static_cast(face->center), static_cast(q), color, 2); + cv::line(preview_image_, static_cast(transform(face->center)), static_cast(transform(q)), color, 2); }; draw_coord_line(0, {0, 0, 255}); draw_coord_line(1, {0, 255, 0}); draw_coord_line(2, {255, 0, 0}); // Draw the computed joint position - auto xy = world_to_image(pose.t, frame.size(), intrinsics); - cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1); + auto xy = transform(neckjoint_position); + cv::circle(preview_image_, cv::Point(xy.x,xy.y), 5, cv::Scalar(0,0,255), -1); } +} - if (settings.show_network_input) - { - cv::Mat netinput = poseestimator->last_network_input(); - if (!netinput.empty()) - { - const int w = std::min(netinput.cols, frame.cols); - const int h = std::min(netinput.rows, frame.rows); - cv::Rect roi(0, 0, w, h); - netinput(roi).copyTo(frame(roi)); - } - } +void Preview::overlay_netinput(const cv::Mat& netinput) +{ + if (netinput.empty()) + return; + const int w = std::min(netinput.cols, preview_image_.cols); + const int h = std::min(netinput.rows, preview_image_.rows); + cv::Rect roi(0, 0, w, h); + netinput(roi).copyTo(preview_image_(roi)); +} + +void Preview::draw_fps(double fps, double last_inference_time) +{ char buf[128]; ::snprintf(buf, sizeof(buf), "%d Hz, pose inference: %d ms", std::clamp(int(fps), 0, 9999), int(last_inference_time)); - cv::putText(frame, buf, cv::Point(10, frame.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1); + cv::putText(preview_image_, buf, cv::Point(10, preview_image_.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1); +} + + +void Preview::copy_to_widget(cv_video_widget& widget) +{ + if (preview_image_.rows > 0) + widget.update_image(preview_image_); +} + + +cv::Rect2f Preview::transform(const cv::Rect2f& r) const +{ + return { (r.x - offset_.x)*scale_, (r.y - offset_.y)*scale_, r.width*scale_, r.height*scale_ }; +} + +cv::Point2f Preview::transform(const cv::Point2f& p) const +{ + return { (p.x - offset_.x)*scale_ , (p.y - offset_.y)*scale_ }; +} + +float Preview::transform(float s) const +{ + return s * scale_; } @@ -856,13 +942,14 @@ bool neuralnet_tracker::open_camera() qDebug() << "neuralnet tracker: can't open camera"; return false; } + return true; } void neuralnet_tracker::set_intrinsics() { - const int w = grayscale.cols, h = grayscale.rows; + const int w = grayscale_.cols, h = grayscale_.rows; const double diag_fov = settings.fov * M_PI / 180.; const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w)); const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h)); @@ -902,6 +989,8 @@ class GuardedThreadCountSwitch void neuralnet_tracker::run() { + preview_.init(*videoWidget); + GuardedThreadCountSwitch switch_num_threads_to(num_threads); if (!open_camera()) @@ -914,7 +1003,7 @@ void neuralnet_tracker::run() while (!isInterruptionRequested()) { - last_inference_time = 0; + is_visible_ = check_is_visible(); auto t = clk.now(); { QMutexLocker l(&camera_mtx); @@ -928,18 +1017,24 @@ void neuralnet_tracker::run() continue; } + { + QMutexLocker lck{&stats_mtx_}; + resolution_ = { img.width, img.height }; + } + auto color = prepare_input_image(img); - color.copyTo(frame); + if (is_visible_) + preview_.copy_video_frame(color); switch (img.channels) { case 1: - grayscale.create(img.height, img.width, CV_8UC1); - color.copyTo(grayscale); + grayscale_.create(img.height, img.width, CV_8UC1); + color.copyTo(grayscale_); break; case 3: - cv::cvtColor(color, grayscale, cv::COLOR_BGR2GRAY); + cv::cvtColor(color, grayscale_, cv::COLOR_BGR2GRAY); break; default: qDebug() << "Can't handle" << img.channels << "color channels"; @@ -951,8 +1046,8 @@ void neuralnet_tracker::run() detect(); - if (frame.rows > 0) - videoWidget->update_image(frame); + if (is_visible_) + preview_.copy_to_widget(*videoWidget); update_fps( std::chrono::duration_cast( @@ -991,9 +1086,9 @@ cv::Mat neuralnet_tracker::prepare_input_image(const video::frame& frame) void neuralnet_tracker::update_fps(double dt) { const double alpha = dt/(dt + RC); - if (dt > 1e-6) { + QMutexLocker lck{&stats_mtx_}; fps *= 1 - alpha; fps += alpha * 1./dt; } @@ -1035,6 +1130,11 @@ Affine neuralnet_tracker::pose() return pose_; } +std::tuple neuralnet_tracker::stats() const +{ + QMutexLocker lck(&stats_mtx_); + return { resolution_, fps, inference_time_ }; +} void neuralnet_dialog::make_fps_combobox() { @@ -1094,6 +1194,10 @@ neuralnet_dialog::neuralnet_dialog() : connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step); calib_timer.setInterval(35); connect(ui.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool))); + + connect(&tracker_status_poll_timer, &QTimer::timeout, this, &neuralnet_dialog::status_poll); + tracker_status_poll_timer.setInterval(250); + tracker_status_poll_timer.start(); } @@ -1143,6 +1247,22 @@ void neuralnet_dialog::unregister_tracker() } +void neuralnet_dialog::status_poll() +{ + QString status; + if (!tracker) + { + status = tr("Tracker Offline"); + } + else + { + auto [ res, fps, inference_time ] = tracker->stats(); + status = tr("%1x%2 @ %3 FPS / Inference: %4 ms").arg(res.width).arg(res.height).arg(int(fps)).arg(int(inference_time)); + } + ui.resolution_display->setText(status); +} + + void neuralnet_dialog::trans_calib_step() { if (tracker) diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index ace16528..00b5f220 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -159,6 +159,32 @@ class PoseEstimator }; +class Preview +{ +public: + void init(const cv_video_widget& widget); + void copy_video_frame(const cv::Mat& frame); + void draw_gizmos( + const std::optional &face, + const Affine& pose, + const std::optional& last_roi, + const std::optional& last_localizer_roi, + const cv::Point2f& neckjoint_position); + void overlay_netinput(const cv::Mat& netinput); + void draw_fps(double fps, double last_inference_time); + void copy_to_widget(cv_video_widget& widget); +private: + // Transform from camera frame to preview + cv::Rect2f transform(const cv::Rect2f& r) const; + cv::Point2f transform(const cv::Point2f& p) const; + float transform(float s) const; + + cv::Mat preview_image_; + cv::Size preview_size_ = { 0, 0 }; + float scale_ = 1.f; + cv::Point2f offset_ = { 0.f, 0.f}; +}; + class neuralnet_tracker : protected virtual QThread, public ITracker { @@ -170,6 +196,7 @@ public: void data(double *data) override; void run() override; Affine pose(); + std::tuple stats() const; QMutex camera_mtx; std::unique_ptr camera; @@ -181,11 +208,9 @@ private: cv::Mat prepare_input_image(const video::frame& frame); bool load_and_initialize_model(); void draw_gizmos( - cv::Mat frame, const std::optional &face, - const Affine& pose) const; + const Affine& pose); void update_fps(double dt); - Affine compute_pose(const PoseEstimator::Face &face) const; Settings settings; @@ -195,20 +220,25 @@ private: Ort::MemoryInfo allocator_info{nullptr}; CamIntrinsics intrinsics{}; - cv::Mat frame, grayscale; + cv::Mat grayscale_; std::array downsized_original_images_ = {}; // Image pyramid std::optional last_localizer_roi; std::optional last_roi; static constexpr float head_size_mm = 200.f; + mutable QMutex stats_mtx_; double fps = 0; - double last_inference_time = 0; + double inference_time_ = 0; + cv::Size resolution_ = {}; + static constexpr double RC = .25; int num_threads = 1; + bool is_visible_ = true; QMutex mtx; // Protects the pose Affine pose_; + Preview preview_; std::unique_ptr videoWidget; std::unique_ptr layout; }; @@ -232,8 +262,9 @@ private: QTimer calib_timer; TranslationCalibrator trans_calib; QMutex calibrator_mutex; - + QTimer tracker_status_poll_timer; neuralnet_tracker* tracker = nullptr; + private Q_SLOTS: void doOK(); @@ -242,6 +273,7 @@ private Q_SLOTS: void update_camera_settings_state(const QString& name); void startstop_trans_calib(bool start); void trans_calib_step(); + void status_poll(); }; diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts index cb6d1da0..92ad65f1 100644 --- a/tracker-neuralnet/lang/nl_NL.ts +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -139,5 +139,13 @@ Don't roll or change position. Start calibration + + Tracker Offline + + + + %1x%2 @ %3 FPS / Inference: %4 ms + + diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index ed69e9a7..dfa7d042 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -139,5 +139,13 @@ Don't roll or change position. Start calibration + + Tracker Offline + + + + %1x%2 @ %3 FPS / Inference: %4 ms + + diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts index db45f47a..a74d272f 100644 --- a/tracker-neuralnet/lang/stub.ts +++ b/tracker-neuralnet/lang/stub.ts @@ -139,5 +139,13 @@ Don't roll or change position. Start calibration + + Tracker Offline + + + + %1x%2 @ %3 FPS / Inference: %4 ms + + diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts index d13219f0..9c936e5c 100644 --- a/tracker-neuralnet/lang/zh_CN.ts +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -139,5 +139,13 @@ Don't roll or change position. Start calibration + + Tracker Offline + + + + %1x%2 @ %3 FPS / Inference: %4 ms + + diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui index 43b316e9..750e6ef3 100644 --- a/tracker-neuralnet/neuralnet-trackercontrols.ui +++ b/tracker-neuralnet/neuralnet-trackercontrols.ui @@ -9,387 +9,223 @@ 0 0 - 647 - 305 + 671 + 357 Tracker settings - - - - QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + 0 + 0 + - - - - - - QFrame::StyledPanel + + true - - QFrame::Raised + + Camera Configuration + + + false + + + false - 0 - - - 0 - - - 0 - - - 0 + 10 - 0 + 8 - - - - 0 - 0 - + + + QLayout::SetDefaultConstraint - - Camera Configuration + + 0 - - - - - Resolution - - - - - - - - 0 - 0 - - - - Field of view. Needed to transform the pose to world coordinates. - - - - - - 35 - - - 90 - - - - - - - Frames per second - - - - - - - Diagonal FOV - - - - - - - - 0 - 0 - - - - Requested video frame rate. Actual setting may not be supported by the camera. - - - - - - - - 0 - 0 - - - - Camera settings - - - - - - - The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. - - - - - - - - 0 - 0 - - - - - - - - Camera name - - - - - - - - 0 - 0 - - - - - - - - - - - MJPEG - - - - - + + 0 + + + 0 + + + 0 + + + 0 + + + 2 + + + + + + 0 + 0 + + + + + + + + Diagonal FOV + + + + + + + Camera name + + + + + + + + 0 + 0 + + + + Field of view. Needed to transform the pose to world coordinates. + + + + + + 35 + + + 90 + + + + + + + + 0 + 0 + + + + The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. + + + + + + + Resolution + + + + - - - - 0 - 0 - + + + 0 - - Head Center Offset + + 0 - - - - - - 0 - 0 - - - - - 16777215 - 16777215 - - - - QFrame::NoFrame - - - QFrame::Raised - - - - QLayout::SetDefaultConstraint - - - 0 - - - - - - 150 - 16777215 - - - - mm - - - -65535 - - - 65536 - - - - - - - - 0 - 0 - - - - Right - - - - - - - - 150 - 16777215 - - - - mm - - - -65535 - - - 65536 - - - - - - - - 0 - 0 - - - - Forward - - - - - - - - 150 - 16777215 - - - - mm - - - -65535 - - - 65536 - - - - - - - - 0 - 0 - - - - Up - - - - - - - - - - - 0 - 0 - - - - - 260 - 0 - - - - QFrame::NoFrame - - - QFrame::Raised - - - - - - Use only yaw and pitch while calibrating. -Don't roll or change position. - - - Qt::AlignCenter - - - true - - - false - - - - - - - - 0 - 0 - - - - - - - true - - - - - - - false - - - Start calibration - - - true - - - - - - - - + + 0 + + + 0 + + + 0 + + + 2 + + + + + + 0 + 0 + + + + Requested video frame rate. Actual setting may not be supported by the camera. + + + + + + + Frames per second + + + + + + + + 0 + 0 + + + + + 0 + 0 + + + + + + + + + + + MJPEG + + + + + + + + 0 + 0 + + + + Camera settings + + + + + + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + @@ -404,6 +240,9 @@ Don't roll or change position. 0 + + true + Tuning / Debug @@ -554,6 +393,242 @@ Don't roll or change position. + + + + + 0 + 0 + + + + true + + + Head Center Offset + + + + + + + 0 + 0 + + + + + 16777215 + 16777215 + + + + QFrame::NoFrame + + + QFrame::Raised + + + + QLayout::SetDefaultConstraint + + + 0 + + + + + + 150 + 16777215 + + + + mm + + + -65535 + + + 65536 + + + + + + + + 0 + 0 + + + + Right + + + + + + + + 150 + 16777215 + + + + mm + + + -65535 + + + 65536 + + + + + + + + 0 + 0 + + + + Forward + + + + + + + + 150 + 16777215 + + + + mm + + + -65535 + + + 65536 + + + + + + + + 0 + 0 + + + + Up + + + + + + + + + + + 0 + 0 + + + + + 260 + 0 + + + + QFrame::NoFrame + + + QFrame::Raised + + + + + + Use only yaw and pitch while calibrating. +Don't roll or change position. + + + Qt::AlignCenter + + + true + + + false + + + + + + + + 0 + 0 + + + + QFrame::Panel + + + QFrame::Sunken + + + + + + true + + + + + + + false + + + Start calibration + + + true + + + + + + + + + + + + + true + + + QFrame::Panel + + + QFrame::Sunken + + + + + + -- cgit v1.2.3