From 4d6e80a6c585f6929f6bf4bf319932732160ce84 Mon Sep 17 00:00:00 2001 From: Michael Welter Date: Thu, 12 May 2022 19:54:57 +0200 Subject: tracker/nn: Add support for recurrent model and further tweaks Regarding tweaks: * EWA smoothing of head ROI. Smoothing strength is a UI setting. * Adjustible zoom into the detected face. The predicted ROI is scaled by a factor the user can set. There is a sweet spot somewhere near 1. * Adjustible number of threads * The ROI is no longer taken as model output directly. This was actually not needed. Perhaps as auxiliary training objective for the network. But the tracker implementation now just uses a square area around the head center according to the predicted head size. * Add comment and debug notification on face ROI model output --- tracker-neuralnet/lang/ru_RU.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tracker-neuralnet/lang/ru_RU.ts') diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index 7e8a9c09..a8252299 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -64,6 +64,22 @@ Don't roll or change position. MJPEG + + Tuning / Debug + + + + ROI Smoothing Alpha + + + + ROI Zoom + + + + Thread Count + + neuralnet_tracker_ns::neuralnet_dialog -- cgit v1.2.3 From f788d94a9ea446c7b687b4a8f3d4cfae41a61296 Mon Sep 17 00:00:00 2001 From: Michael Welter Date: Tue, 17 May 2022 07:50:55 +0200 Subject: tracker/nn: Add resolution setting and tooltips Tooltips only for half of the settings or so. When hovering over the actual input boxes. --- tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 123 ++++++++++++++++-- tracker-neuralnet/ftnoir_tracker_neuralnet.h | 22 ++++ tracker-neuralnet/lang/nl_NL.ts | 32 +++++ tracker-neuralnet/lang/ru_RU.ts | 32 +++++ tracker-neuralnet/lang/stub.ts | 32 +++++ tracker-neuralnet/lang/zh_CN.ts | 32 +++++ tracker-neuralnet/neuralnet-trackercontrols.ui | 169 ++++++++++++++++--------- 7 files changed, 369 insertions(+), 73 deletions(-) (limited to 'tracker-neuralnet/lang/ru_RU.ts') diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index 00f3f281..f849f4e1 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -16,7 +16,6 @@ #include #include "compat/timer.hpp" #include -#include #ifdef _MSC_VER # pragma warning(disable : 4702) @@ -31,6 +30,8 @@ #include #include #include +#include + // Some demo code for onnx // https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp @@ -59,6 +60,35 @@ float sigmoid(float x) } +cv::Rect make_crop_rect_for_aspect(const cv::Size &size, int aspect_w, int aspect_h) +{ + auto [w, h] = size; + if ( w*aspect_h > aspect_w*h ) + { + // Image is too wide + const int new_w = (aspect_w*h)/aspect_h; + return cv::Rect((w - new_w)/2, 0, new_w, h); + } + else + { + const int new_h = (aspect_h*w)/aspect_w; + return cv::Rect(0, (h - new_h)/2, w, new_h); + } +} + +cv::Rect make_crop_rect_multiple_of(const cv::Size &size, int multiple) +{ + const int new_w = (size.width / multiple) * multiple; + const int new_h = (size.height / multiple) * multiple; + return cv::Rect( + (size.width-new_w)/2, + (size.height-new_h)/2, + new_w, + new_h + ); +} + + template cv::Rect_ squarize(const cv::Rect_ &r) { @@ -752,7 +782,6 @@ module_status neuralnet_tracker::start_tracker(QFrame* videoframe) videoframe->setLayout(&*layout); videoWidget->show(); num_threads = settings.num_threads; - cv::setNumThreads(num_threads); start(); return status_ok(); } @@ -774,8 +803,7 @@ bool neuralnet_tracker::load_and_initialize_model() auto opts = Ort::SessionOptions{}; // Do thread settings here do anything? // There is a warning which says to control number of threads via - // openmp settings. Which is what we do. omp_set_num_threads directly - // before running the inference pass. + // openmp settings. Which is what we do. opts.SetIntraOpNumThreads(num_threads); opts.SetInterOpNumThreads(1); allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); @@ -800,6 +828,8 @@ bool neuralnet_tracker::load_and_initialize_model() bool neuralnet_tracker::open_camera() { + int rint = std::clamp(*settings.resolution, 0, (int)std::size(resolution_choices)-1); + resolution_tuple res = resolution_choices[rint]; int fps = enum_to_fps(settings.force_fps); QMutexLocker l(&camera_mtx); @@ -811,9 +841,11 @@ bool neuralnet_tracker::open_camera() video::impl::camera::info args {}; - args.width = 320; - args.height = 240; - + if (res.width) + { + args.width = res.width; + args.height = res.height; + } if (fps) args.fps = fps; @@ -844,8 +876,34 @@ void neuralnet_tracker::set_intrinsics() } +class GuardedThreadCountSwitch +{ + int old_num_threads_cv = 1; + int old_num_threads_omp = 1; + public: + GuardedThreadCountSwitch(int num_threads) + { + old_num_threads_cv = cv::getNumThreads(); + old_num_threads_omp = omp_get_num_threads(); + omp_set_num_threads(num_threads); + cv::setNumThreads(num_threads); + } + + ~GuardedThreadCountSwitch() + { + omp_set_num_threads(old_num_threads_omp); + cv::setNumThreads(old_num_threads_cv); + } + + GuardedThreadCountSwitch(const GuardedThreadCountSwitch&) = delete; + GuardedThreadCountSwitch& operator=(const GuardedThreadCountSwitch&) = delete; +}; + + void neuralnet_tracker::run() { + GuardedThreadCountSwitch switch_num_threads_to(num_threads); + if (!open_camera()) return; @@ -870,7 +928,8 @@ void neuralnet_tracker::run() continue; } - auto color = cv::Mat(img.height, img.width, CV_8UC(img.channels), (void*)img.data, img.stride); + auto color = prepare_input_image(img); + color.copyTo(frame); switch (img.channels) @@ -890,13 +949,8 @@ void neuralnet_tracker::run() set_intrinsics(); - const auto nt = omp_get_num_threads(); - omp_set_num_threads(num_threads); - detect(); - omp_set_num_threads(nt); - if (frame.rows > 0) videoWidget->update_image(frame); @@ -907,6 +961,33 @@ void neuralnet_tracker::run() } +cv::Mat neuralnet_tracker::prepare_input_image(const video::frame& frame) +{ + auto img = cv::Mat(frame.height, frame.width, CV_8UC(frame.channels), (void*)frame.data, frame.stride); + + // Crop if aspect ratio is not 4:3 + if (img.rows*4 != img.cols*3) + { + img = img(make_crop_rect_for_aspect(img.size(), 4, 3)); + } + + img = img(make_crop_rect_multiple_of(img.size(), 4)); + + if (img.cols > 640) + { + cv::pyrDown(img, downsized_original_images_[0]); + img = downsized_original_images_[0]; + } + if (img.cols > 640) + { + cv::pyrDown(img, downsized_original_images_[1]); + img = downsized_original_images_[1]; + } + + return img; +} + + void neuralnet_tracker::update_fps(double dt) { const double alpha = dt/(dt + RC); @@ -965,6 +1046,18 @@ void neuralnet_dialog::make_fps_combobox() } } +void neuralnet_dialog::make_resolution_combobox() +{ + int k=0; + for (const auto [w, h] : resolution_choices) + { + const QString s = (w == 0) + ? tr("Default") + : QString::number(w) + " x " + QString::number(h); + ui.resolution->addItem(s, k++); + } +} + neuralnet_dialog::neuralnet_dialog() : trans_calib(1, 2) @@ -972,7 +1065,7 @@ neuralnet_dialog::neuralnet_dialog() : ui.setupUi(this); make_fps_combobox(); - tie_setting(settings.force_fps, ui.cameraFPS); + make_resolution_combobox(); for (const auto& str : video::camera_names()) ui.cameraName->addItem(str); @@ -987,6 +1080,8 @@ neuralnet_dialog::neuralnet_dialog() : tie_setting(settings.use_mjpeg, ui.use_mjpeg); tie_setting(settings.roi_zoom, ui.roiZoom); tie_setting(settings.num_threads, ui.threadCount); + tie_setting(settings.resolution, ui.resolution); + tie_setting(settings.force_fps, ui.cameraFPS); connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index 25f1a0a2..ace16528 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -25,6 +25,7 @@ #include #include +#include #include @@ -49,6 +50,23 @@ enum fps_choices fps_MAX = 3 }; +struct resolution_tuple +{ + int width; + int height; +}; + +static const std::array resolution_choices = +{{ + { 320, 240 }, + { 640, 480 }, + { 800, 600 }, + { 1024, 768 }, + { 1280, 720 }, + { 1920, 1080}, + { 0, 0 } +}}; + struct Settings : opts { value offset_fwd { b, "offset-fwd", 200 }, // Millimeters @@ -62,6 +80,7 @@ struct Settings : opts { value roi_zoom{ b, "roi-zoom", 1. }; value use_mjpeg { b, "use-mjpeg", false }; value num_threads { b, "num-threads", 1 }; + value resolution { b, "force-resolution", 0 }; Settings(); }; @@ -159,6 +178,7 @@ private: bool detect(); bool open_camera(); void set_intrinsics(); + cv::Mat prepare_input_image(const video::frame& frame); bool load_and_initialize_model(); void draw_gizmos( cv::Mat frame, @@ -176,6 +196,7 @@ private: CamIntrinsics intrinsics{}; cv::Mat frame, grayscale; + std::array downsized_original_images_ = {}; // Image pyramid std::optional last_localizer_roi; std::optional last_roi; static constexpr float head_size_mm = 200.f; @@ -202,6 +223,7 @@ public: void unregister_tracker() override; private: void make_fps_combobox(); + void make_resolution_combobox(); Ui::Form ui; Settings settings; diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts index 95da8f4c..cb6d1da0 100644 --- a/tracker-neuralnet/lang/nl_NL.ts +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -80,6 +80,38 @@ Don't roll or change position. Thread Count + + Resolution + + + + Field of view. Needed to transform the pose to world coordinates. + + + + Requested video frame rate. Actual setting may not be supported by the camera. + + + + The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. + + + + Number of threads. Can be used to balance the CPU load between the game and the tracker. + + + + Show the image patch that the pose estimation model sees. + + + + Amount of smoothing of the face region coordinates. Can help stabilize the pose. + + + + Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1. + + neuralnet_tracker_ns::neuralnet_dialog diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index a8252299..ed69e9a7 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -80,6 +80,38 @@ Don't roll or change position. Thread Count + + Resolution + + + + Field of view. Needed to transform the pose to world coordinates. + + + + Requested video frame rate. Actual setting may not be supported by the camera. + + + + The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. + + + + Number of threads. Can be used to balance the CPU load between the game and the tracker. + + + + Show the image patch that the pose estimation model sees. + + + + Amount of smoothing of the face region coordinates. Can help stabilize the pose. + + + + Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1. + + neuralnet_tracker_ns::neuralnet_dialog diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts index 80103fde..db45f47a 100644 --- a/tracker-neuralnet/lang/stub.ts +++ b/tracker-neuralnet/lang/stub.ts @@ -80,6 +80,38 @@ Don't roll or change position. Thread Count + + Resolution + + + + Field of view. Needed to transform the pose to world coordinates. + + + + Requested video frame rate. Actual setting may not be supported by the camera. + + + + The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. + + + + Number of threads. Can be used to balance the CPU load between the game and the tracker. + + + + Show the image patch that the pose estimation model sees. + + + + Amount of smoothing of the face region coordinates. Can help stabilize the pose. + + + + Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1. + + neuralnet_tracker_ns::neuralnet_dialog diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts index f55c12fc..d13219f0 100644 --- a/tracker-neuralnet/lang/zh_CN.ts +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -80,6 +80,38 @@ Don't roll or change position. Thread Count + + Resolution + + + + Field of view. Needed to transform the pose to world coordinates. + + + + Requested video frame rate. Actual setting may not be supported by the camera. + + + + The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. + + + + Number of threads. Can be used to balance the CPU load between the game and the tracker. + + + + Show the image patch that the pose estimation model sees. + + + + Amount of smoothing of the face region coordinates. Can help stabilize the pose. + + + + Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1. + + neuralnet_tracker_ns::neuralnet_dialog diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui index acbfe909..43b316e9 100644 --- a/tracker-neuralnet/neuralnet-trackercontrols.ui +++ b/tracker-neuralnet/neuralnet-trackercontrols.ui @@ -9,8 +9,8 @@ 0 0 - 721 - 277 + 647 + 305 @@ -60,13 +60,10 @@ Camera Configuration - - - - - 0 - 0 - + + + + Resolution @@ -78,6 +75,9 @@ 0 + + Field of view. Needed to transform the pose to world coordinates. + @@ -96,20 +96,6 @@ - - - - MJPEG - - - - - - - Camera name - - - @@ -125,31 +111,65 @@ 0 + + Requested video frame rate. Actual setting may not be supported by the camera. + - - + + - + 0 0 - + Camera settings - + + + The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. + + + + + - + 0 0 + + + + - Camera settings + Camera name + + + + + + + + 0 + 0 + + + + + + + + + + + MJPEG @@ -189,6 +209,12 @@ QFrame::Raised + + QLayout::SetDefaultConstraint + + + 0 + @@ -382,21 +408,34 @@ Don't roll or change position. Tuning / Debug - - + + Qt::Vertical - - - - Thread Count + + + + Number of threads. Can be used to balance the CPU load between the game and the tracker. + + + 1 + + + 32 - + + + + Qt::Vertical + + + + @@ -409,14 +448,21 @@ Don't roll or change position. - - + + + + ROI Zoom + + + + + Qt::Vertical - + @@ -424,12 +470,15 @@ Don't roll or change position. 0 + + Show the image patch that the pose estimation model sees. + Show Network Input - + @@ -443,6 +492,9 @@ Don't roll or change position. 16777215 + + Amount of smoothing of the face region coordinates. Can help stabilize the pose. + false @@ -460,22 +512,18 @@ Don't roll or change position. - - + + - ROI Zoom - - - - - - - Qt::Vertical + Thread Count - + + + Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1. + 0.100000000000000 @@ -490,15 +538,18 @@ Don't roll or change position. - - - - 1 + + + + Qt::Horizontal - - 32 + + + 40 + 20 + - + -- cgit v1.2.3 From e1d17f2d413de5f548931eaf9dfed2155e830096 Mon Sep 17 00:00:00 2001 From: Michael Welter Date: Wed, 18 May 2022 06:36:25 +0200 Subject: tracker/nn: Improve preview * The preview image is now generated with the dimensions of the widget * The pose visualization is added afterwards adjusted to the preview size * The fps / inference time readout is moved to the settings dialog * The actual obtained resolution from the camera is also shown * Dialog layout is changed Note: Switching to using underscore to mark class member vars. It's not consistently applied yet. --- tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 206 +++++-- tracker-neuralnet/ftnoir_tracker_neuralnet.h | 44 +- tracker-neuralnet/lang/nl_NL.ts | 8 + tracker-neuralnet/lang/ru_RU.ts | 8 + tracker-neuralnet/lang/stub.ts | 8 + tracker-neuralnet/lang/zh_CN.ts | 8 + tracker-neuralnet/neuralnet-trackercontrols.ui | 787 ++++++++++++++----------- 7 files changed, 664 insertions(+), 405 deletions(-) (limited to 'tracker-neuralnet/lang/ru_RU.ts') diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index f849f4e1..352baf29 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -15,6 +15,7 @@ #include #include #include "compat/timer.hpp" +#include "compat/check-visible.hpp" #include #ifdef _MSC_VER @@ -54,6 +55,18 @@ std::string convert(const QString &s) { return s.toStdString(); } #endif +template +struct OnScopeExit +{ + explicit OnScopeExit(F&& f) : f_{ f } {} + ~OnScopeExit() noexcept + { + f_(); + } + F f_; +}; + + float sigmoid(float x) { return 1.f/(1.f + std::exp(-x)); @@ -88,7 +101,6 @@ cv::Rect make_crop_rect_multiple_of(const cv::Size &size, int multiple) ); } - template cv::Rect_ squarize(const cv::Rect_ &r) { @@ -592,12 +604,20 @@ double PoseEstimator::last_inference_time_millis() const bool neuralnet_tracker::detect() { + double inference_time = 0.; + + OnScopeExit update_inference_time{ [&]() { + + QMutexLocker lck{ &stats_mtx_ }; + inference_time_ = inference_time; + } }; + // Note: BGR colors! if (!last_localizer_roi || !last_roi || iou(*last_localizer_roi,*last_roi)<0.25) { - auto [p, rect] = localizer->run(grayscale); - last_inference_time += localizer->last_inference_time_millis(); + auto [p, rect] = localizer->run(grayscale_); + inference_time += localizer->last_inference_time_millis(); if (p > 0.5 || rect.height < 5 || rect.width < 5) { last_localizer_roi = rect; @@ -612,17 +632,17 @@ bool neuralnet_tracker::detect() if (!last_roi) { - draw_gizmos(frame, {}, {}); + draw_gizmos({}, {}); return false; } - auto face = poseestimator->run(grayscale, *last_roi); - last_inference_time += poseestimator->last_inference_time_millis(); + auto face = poseestimator->run(grayscale_, *last_roi); + inference_time += poseestimator->last_inference_time_millis(); if (!face) { last_roi.reset(); - draw_gizmos(frame, *face, {}); + draw_gizmos(*face, {}); return false; } @@ -646,7 +666,7 @@ bool neuralnet_tracker::detect() Affine pose = compute_pose(*face); - draw_gizmos(frame, *face, pose); + draw_gizmos(*face, pose); { QMutexLocker lck(&mtx); @@ -657,12 +677,31 @@ bool neuralnet_tracker::detect() } +void neuralnet_tracker::draw_gizmos( + const std::optional &face, + const Affine& pose) +{ + if (!is_visible_) + return; + + preview_.draw_gizmos(face, pose, last_roi, last_localizer_roi, world_to_image(pose.t, grayscale_.size(), intrinsics)); + + if (settings.show_network_input) + { + cv::Mat netinput = poseestimator->last_network_input(); + preview_.overlay_netinput(netinput); + } + + //preview_.draw_fps(fps, last_inference_time); +} + + Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const { // Compute the location the network outputs in 3d space. const mat33 rot_correction = compute_rotation_correction( - normalize(face.center, frame.rows, frame.cols), + normalize(face.center, grayscale_.rows, grayscale_.cols), intrinsics.focal_length_w); const mat33 m = rot_correction * quaternion_to_mat33( @@ -683,16 +722,15 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const const vec3 face_world_pos = image_to_world( face.center.x, face.center.y, face.size, head_size_mm, - frame.size(), + grayscale_.size(), intrinsics); // But this is in general not the location of the rotation joint in the neck. // So we need an extra offset. Which we determine by solving // z,y,z-pos = head_joint_loc + R_face * offset - const vec3 pos = face_world_pos + m * vec3{ - static_cast(settings.offset_fwd), + static_cast(settings.offset_fwd), static_cast(settings.offset_up), static_cast(settings.offset_right)}; @@ -700,27 +738,50 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const } -void neuralnet_tracker::draw_gizmos( - cv::Mat frame, +void Preview::init(const cv_video_widget& widget) +{ + auto [w,h] = widget.preview_size(); + preview_size_ = { w, h }; +} + + +void Preview::copy_video_frame(const cv::Mat& frame) +{ + cv::Rect roi = make_crop_rect_for_aspect(frame.size(), preview_size_.width, preview_size_.height); + + cv::resize(frame(roi), preview_image_, preview_size_, 0, 0, cv::INTER_NEAREST); + + offset_ = { (float)-roi.x, (float)-roi.y }; + scale_ = float(preview_image_.cols) / float(roi.width); +} + + +void Preview::draw_gizmos( const std::optional &face, - const Affine& pose) const + const Affine& pose, + const std::optional& last_roi, + const std::optional& last_localizer_roi, + const cv::Point2f& neckjoint_position) { + if (preview_image_.empty()) + return; + if (last_roi) { const int col = 255; - cv::rectangle(frame, *last_roi, cv::Scalar(0, col, 0), /*thickness=*/1); + cv::rectangle(preview_image_, transform(*last_roi), cv::Scalar(0, col, 0), /*thickness=*/1); } if (last_localizer_roi) { const int col = 255; - cv::rectangle(frame, *last_localizer_roi, cv::Scalar(col, 0, 255-col), /*thickness=*/1); + cv::rectangle(preview_image_, transform(*last_localizer_roi), cv::Scalar(col, 0, 255-col), /*thickness=*/1); } if (face) { if (face->size>=1.f) - cv::circle(frame, static_cast(face->center), int(face->size), cv::Scalar(255,255,255), 2); - cv::circle(frame, static_cast(face->center), 3, cv::Scalar(255,255,255), -1); + cv::circle(preview_image_, static_cast(transform(face->center)), int(transform(face->size)), cv::Scalar(255,255,255), 2); + cv::circle(preview_image_, static_cast(transform(face->center)), 3, cv::Scalar(255,255,255), -1); auto draw_coord_line = [&](int i, const cv::Scalar& color) { @@ -728,32 +789,57 @@ void neuralnet_tracker::draw_gizmos( const float vy = -pose.R(1,i); static constexpr float len = 100.f; cv::Point q = face->center + len*cv::Point2f{vx, vy}; - cv::line(frame, static_cast(face->center), static_cast(q), color, 2); + cv::line(preview_image_, static_cast(transform(face->center)), static_cast(transform(q)), color, 2); }; draw_coord_line(0, {0, 0, 255}); draw_coord_line(1, {0, 255, 0}); draw_coord_line(2, {255, 0, 0}); // Draw the computed joint position - auto xy = world_to_image(pose.t, frame.size(), intrinsics); - cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1); + auto xy = transform(neckjoint_position); + cv::circle(preview_image_, cv::Point(xy.x,xy.y), 5, cv::Scalar(0,0,255), -1); } +} - if (settings.show_network_input) - { - cv::Mat netinput = poseestimator->last_network_input(); - if (!netinput.empty()) - { - const int w = std::min(netinput.cols, frame.cols); - const int h = std::min(netinput.rows, frame.rows); - cv::Rect roi(0, 0, w, h); - netinput(roi).copyTo(frame(roi)); - } - } +void Preview::overlay_netinput(const cv::Mat& netinput) +{ + if (netinput.empty()) + return; + const int w = std::min(netinput.cols, preview_image_.cols); + const int h = std::min(netinput.rows, preview_image_.rows); + cv::Rect roi(0, 0, w, h); + netinput(roi).copyTo(preview_image_(roi)); +} + +void Preview::draw_fps(double fps, double last_inference_time) +{ char buf[128]; ::snprintf(buf, sizeof(buf), "%d Hz, pose inference: %d ms", std::clamp(int(fps), 0, 9999), int(last_inference_time)); - cv::putText(frame, buf, cv::Point(10, frame.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1); + cv::putText(preview_image_, buf, cv::Point(10, preview_image_.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1); +} + + +void Preview::copy_to_widget(cv_video_widget& widget) +{ + if (preview_image_.rows > 0) + widget.update_image(preview_image_); +} + + +cv::Rect2f Preview::transform(const cv::Rect2f& r) const +{ + return { (r.x - offset_.x)*scale_, (r.y - offset_.y)*scale_, r.width*scale_, r.height*scale_ }; +} + +cv::Point2f Preview::transform(const cv::Point2f& p) const +{ + return { (p.x - offset_.x)*scale_ , (p.y - offset_.y)*scale_ }; +} + +float Preview::transform(float s) const +{ + return s * scale_; } @@ -856,13 +942,14 @@ bool neuralnet_tracker::open_camera() qDebug() << "neuralnet tracker: can't open camera"; return false; } + return true; } void neuralnet_tracker::set_intrinsics() { - const int w = grayscale.cols, h = grayscale.rows; + const int w = grayscale_.cols, h = grayscale_.rows; const double diag_fov = settings.fov * M_PI / 180.; const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w)); const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h)); @@ -902,6 +989,8 @@ class GuardedThreadCountSwitch void neuralnet_tracker::run() { + preview_.init(*videoWidget); + GuardedThreadCountSwitch switch_num_threads_to(num_threads); if (!open_camera()) @@ -914,7 +1003,7 @@ void neuralnet_tracker::run() while (!isInterruptionRequested()) { - last_inference_time = 0; + is_visible_ = check_is_visible(); auto t = clk.now(); { QMutexLocker l(&camera_mtx); @@ -928,18 +1017,24 @@ void neuralnet_tracker::run() continue; } + { + QMutexLocker lck{&stats_mtx_}; + resolution_ = { img.width, img.height }; + } + auto color = prepare_input_image(img); - color.copyTo(frame); + if (is_visible_) + preview_.copy_video_frame(color); switch (img.channels) { case 1: - grayscale.create(img.height, img.width, CV_8UC1); - color.copyTo(grayscale); + grayscale_.create(img.height, img.width, CV_8UC1); + color.copyTo(grayscale_); break; case 3: - cv::cvtColor(color, grayscale, cv::COLOR_BGR2GRAY); + cv::cvtColor(color, grayscale_, cv::COLOR_BGR2GRAY); break; default: qDebug() << "Can't handle" << img.channels << "color channels"; @@ -951,8 +1046,8 @@ void neuralnet_tracker::run() detect(); - if (frame.rows > 0) - videoWidget->update_image(frame); + if (is_visible_) + preview_.copy_to_widget(*videoWidget); update_fps( std::chrono::duration_cast( @@ -991,9 +1086,9 @@ cv::Mat neuralnet_tracker::prepare_input_image(const video::frame& frame) void neuralnet_tracker::update_fps(double dt) { const double alpha = dt/(dt + RC); - if (dt > 1e-6) { + QMutexLocker lck{&stats_mtx_}; fps *= 1 - alpha; fps += alpha * 1./dt; } @@ -1035,6 +1130,11 @@ Affine neuralnet_tracker::pose() return pose_; } +std::tuple neuralnet_tracker::stats() const +{ + QMutexLocker lck(&stats_mtx_); + return { resolution_, fps, inference_time_ }; +} void neuralnet_dialog::make_fps_combobox() { @@ -1094,6 +1194,10 @@ neuralnet_dialog::neuralnet_dialog() : connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step); calib_timer.setInterval(35); connect(ui.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool))); + + connect(&tracker_status_poll_timer, &QTimer::timeout, this, &neuralnet_dialog::status_poll); + tracker_status_poll_timer.setInterval(250); + tracker_status_poll_timer.start(); } @@ -1143,6 +1247,22 @@ void neuralnet_dialog::unregister_tracker() } +void neuralnet_dialog::status_poll() +{ + QString status; + if (!tracker) + { + status = tr("Tracker Offline"); + } + else + { + auto [ res, fps, inference_time ] = tracker->stats(); + status = tr("%1x%2 @ %3 FPS / Inference: %4 ms").arg(res.width).arg(res.height).arg(int(fps)).arg(int(inference_time)); + } + ui.resolution_display->setText(status); +} + + void neuralnet_dialog::trans_calib_step() { if (tracker) diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index ace16528..00b5f220 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -159,6 +159,32 @@ class PoseEstimator }; +class Preview +{ +public: + void init(const cv_video_widget& widget); + void copy_video_frame(const cv::Mat& frame); + void draw_gizmos( + const std::optional &face, + const Affine& pose, + const std::optional& last_roi, + const std::optional& last_localizer_roi, + const cv::Point2f& neckjoint_position); + void overlay_netinput(const cv::Mat& netinput); + void draw_fps(double fps, double last_inference_time); + void copy_to_widget(cv_video_widget& widget); +private: + // Transform from camera frame to preview + cv::Rect2f transform(const cv::Rect2f& r) const; + cv::Point2f transform(const cv::Point2f& p) const; + float transform(float s) const; + + cv::Mat preview_image_; + cv::Size preview_size_ = { 0, 0 }; + float scale_ = 1.f; + cv::Point2f offset_ = { 0.f, 0.f}; +}; + class neuralnet_tracker : protected virtual QThread, public ITracker { @@ -170,6 +196,7 @@ public: void data(double *data) override; void run() override; Affine pose(); + std::tuple stats() const; QMutex camera_mtx; std::unique_ptr camera; @@ -181,11 +208,9 @@ private: cv::Mat prepare_input_image(const video::frame& frame); bool load_and_initialize_model(); void draw_gizmos( - cv::Mat frame, const std::optional &face, - const Affine& pose) const; + const Affine& pose); void update_fps(double dt); - Affine compute_pose(const PoseEstimator::Face &face) const; Settings settings; @@ -195,20 +220,25 @@ private: Ort::MemoryInfo allocator_info{nullptr}; CamIntrinsics intrinsics{}; - cv::Mat frame, grayscale; + cv::Mat grayscale_; std::array downsized_original_images_ = {}; // Image pyramid std::optional last_localizer_roi; std::optional last_roi; static constexpr float head_size_mm = 200.f; + mutable QMutex stats_mtx_; double fps = 0; - double last_inference_time = 0; + double inference_time_ = 0; + cv::Size resolution_ = {}; + static constexpr double RC = .25; int num_threads = 1; + bool is_visible_ = true; QMutex mtx; // Protects the pose Affine pose_; + Preview preview_; std::unique_ptr videoWidget; std::unique_ptr layout; }; @@ -232,8 +262,9 @@ private: QTimer calib_timer; TranslationCalibrator trans_calib; QMutex calibrator_mutex; - + QTimer tracker_status_poll_timer; neuralnet_tracker* tracker = nullptr; + private Q_SLOTS: void doOK(); @@ -242,6 +273,7 @@ private Q_SLOTS: void update_camera_settings_state(const QString& name); void startstop_trans_calib(bool start); void trans_calib_step(); + void status_poll(); }; diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts index cb6d1da0..92ad65f1 100644 --- a/tracker-neuralnet/lang/nl_NL.ts +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -139,5 +139,13 @@ Don't roll or change position. Start calibration + + Tracker Offline + + + + %1x%2 @ %3 FPS / Inference: %4 ms + + diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index ed69e9a7..dfa7d042 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -139,5 +139,13 @@ Don't roll or change position. Start calibration + + Tracker Offline + + + + %1x%2 @ %3 FPS / Inference: %4 ms + + diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts index db45f47a..a74d272f 100644 --- a/tracker-neuralnet/lang/stub.ts +++ b/tracker-neuralnet/lang/stub.ts @@ -139,5 +139,13 @@ Don't roll or change position. Start calibration + + Tracker Offline + + + + %1x%2 @ %3 FPS / Inference: %4 ms + + diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts index d13219f0..9c936e5c 100644 --- a/tracker-neuralnet/lang/zh_CN.ts +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -139,5 +139,13 @@ Don't roll or change position. Start calibration + + Tracker Offline + + + + %1x%2 @ %3 FPS / Inference: %4 ms + + diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui index 43b316e9..750e6ef3 100644 --- a/tracker-neuralnet/neuralnet-trackercontrols.ui +++ b/tracker-neuralnet/neuralnet-trackercontrols.ui @@ -9,387 +9,223 @@ 0 0 - 647 - 305 + 671 + 357 Tracker settings - - - - QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + 0 + 0 + - - - - - - QFrame::StyledPanel + + true - - QFrame::Raised + + Camera Configuration + + + false + + + false - 0 - - - 0 - - - 0 - - - 0 + 10 - 0 + 8 - - - - 0 - 0 - + + + QLayout::SetDefaultConstraint - - Camera Configuration + + 0 - - - - - Resolution - - - - - - - - 0 - 0 - - - - Field of view. Needed to transform the pose to world coordinates. - - - - - - 35 - - - 90 - - - - - - - Frames per second - - - - - - - Diagonal FOV - - - - - - - - 0 - 0 - - - - Requested video frame rate. Actual setting may not be supported by the camera. - - - - - - - - 0 - 0 - - - - Camera settings - - - - - - - The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. - - - - - - - - 0 - 0 - - - - - - - - Camera name - - - - - - - - 0 - 0 - - - - - - - - - - - MJPEG - - - - - + + 0 + + + 0 + + + 0 + + + 0 + + + 2 + + + + + + 0 + 0 + + + + + + + + Diagonal FOV + + + + + + + Camera name + + + + + + + + 0 + 0 + + + + Field of view. Needed to transform the pose to world coordinates. + + + + + + 35 + + + 90 + + + + + + + + 0 + 0 + + + + The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution. + + + + + + + Resolution + + + + - - - - 0 - 0 - + + + 0 - - Head Center Offset + + 0 - - - - - - 0 - 0 - - - - - 16777215 - 16777215 - - - - QFrame::NoFrame - - - QFrame::Raised - - - - QLayout::SetDefaultConstraint - - - 0 - - - - - - 150 - 16777215 - - - - mm - - - -65535 - - - 65536 - - - - - - - - 0 - 0 - - - - Right - - - - - - - - 150 - 16777215 - - - - mm - - - -65535 - - - 65536 - - - - - - - - 0 - 0 - - - - Forward - - - - - - - - 150 - 16777215 - - - - mm - - - -65535 - - - 65536 - - - - - - - - 0 - 0 - - - - Up - - - - - - - - - - - 0 - 0 - - - - - 260 - 0 - - - - QFrame::NoFrame - - - QFrame::Raised - - - - - - Use only yaw and pitch while calibrating. -Don't roll or change position. - - - Qt::AlignCenter - - - true - - - false - - - - - - - - 0 - 0 - - - - - - - true - - - - - - - false - - - Start calibration - - - true - - - - - - - - + + 0 + + + 0 + + + 0 + + + 2 + + + + + + 0 + 0 + + + + Requested video frame rate. Actual setting may not be supported by the camera. + + + + + + + Frames per second + + + + + + + + 0 + 0 + + + + + 0 + 0 + + + + + + + + + + + MJPEG + + + + + + + + 0 + 0 + + + + Camera settings + + + + + + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + @@ -404,6 +240,9 @@ Don't roll or change position. 0 + + true + Tuning / Debug @@ -554,6 +393,242 @@ Don't roll or change position. + + + + + 0 + 0 + + + + true + + + Head Center Offset + + + + + + + 0 + 0 + + + + + 16777215 + 16777215 + + + + QFrame::NoFrame + + + QFrame::Raised + + + + QLayout::SetDefaultConstraint + + + 0 + + + + + + 150 + 16777215 + + + + mm + + + -65535 + + + 65536 + + + + + + + + 0 + 0 + + + + Right + + + + + + + + 150 + 16777215 + + + + mm + + + -65535 + + + 65536 + + + + + + + + 0 + 0 + + + + Forward + + + + + + + + 150 + 16777215 + + + + mm + + + -65535 + + + 65536 + + + + + + + + 0 + 0 + + + + Up + + + + + + + + + + + 0 + 0 + + + + + 260 + 0 + + + + QFrame::NoFrame + + + QFrame::Raised + + + + + + Use only yaw and pitch while calibrating. +Don't roll or change position. + + + Qt::AlignCenter + + + true + + + false + + + + + + + + 0 + 0 + + + + QFrame::Panel + + + QFrame::Sunken + + + + + + true + + + + + + + false + + + Start calibration + + + true + + + + + + + + + + + + + true + + + QFrame::Panel + + + QFrame::Sunken + + + + + + -- cgit v1.2.3 From e3de47abc3eba2d1cebc94943a203623c6545f3f Mon Sep 17 00:00:00 2001 From: Michael Welter Date: Wed, 18 May 2022 22:15:02 +0200 Subject: tracker/nn: Use postfix underscore to indicate class member variables --- tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 491 +++++++++++++------------ tracker-neuralnet/ftnoir_tracker_neuralnet.h | 113 +++--- tracker-neuralnet/lang/nl_NL.ts | 16 +- tracker-neuralnet/lang/ru_RU.ts | 16 +- tracker-neuralnet/lang/stub.ts | 16 +- tracker-neuralnet/lang/zh_CN.ts | 16 +- 6 files changed, 335 insertions(+), 333 deletions(-) (limited to 'tracker-neuralnet/lang/ru_RU.ts') diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index 352baf29..5439b38e 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -320,49 +320,49 @@ int enum_to_fps(int value) Localizer::Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session) : - session{std::move(session)}, - scaled_frame(input_img_height, input_img_width, CV_8U), - input_mat(input_img_height, input_img_width, CV_32F) + session_{std::move(session)}, + scaled_frame_(INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH, CV_8U), + input_mat_(INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH, CV_32F) { // Only works when input_mat does not reallocated memory ...which it should not. // Non-owning memory reference to input_mat? // Note: shape = (bach x channels x h x w) - const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width }; - input_val = Ort::Value::CreateTensor(allocator_info, input_mat.ptr(0), input_mat.total(), input_shape, 4); + const std::int64_t input_shape[4] = { 1, 1, INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH }; + input_val_ = Ort::Value::CreateTensor(allocator_info, input_mat_.ptr(0), input_mat_.total(), input_shape, 4); const std::int64_t output_shape[2] = { 1, 5 }; - output_val = Ort::Value::CreateTensor(allocator_info, results.data(), results.size(), output_shape, 2); + output_val_ = Ort::Value::CreateTensor(allocator_info, results_.data(), results_.size(), output_shape, 2); } std::pair Localizer::run( const cv::Mat &frame) { - auto p = input_mat.ptr(0); + auto p = input_mat_.ptr(0); - cv::resize(frame, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA); - scaled_frame.convertTo(input_mat, CV_32F, 1./255., -0.5); + cv::resize(frame, scaled_frame_, { INPUT_IMG_WIDTH, INPUT_IMG_HEIGHT }, 0, 0, cv::INTER_AREA); + scaled_frame_.convertTo(input_mat_, CV_32F, 1./255., -0.5); - assert (input_mat.ptr(0) == p); - assert (!input_mat.empty() && input_mat.isContinuous()); - assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height); + assert (input_mat_.ptr(0) == p); + assert (!input_mat_.empty() && input_mat_.isContinuous()); + assert (input_mat_.cols == INPUT_IMG_WIDTH && input_mat_.rows == INPUT_IMG_HEIGHT); const char* input_names[] = {"x"}; const char* output_names[] = {"logit_box"}; Timer t; t.start(); - session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1); + session_.Run(Ort::RunOptions{nullptr}, input_names, &input_val_, 1, output_names, &output_val_, 1); - last_inference_time = t.elapsed_ms(); + last_inference_time_ = t.elapsed_ms(); const cv::Rect2f roi = unnormalize(cv::Rect2f{ - results[1], - results[2], - results[3]-results[1], // Width - results[4]-results[2] // Height + results_[1], + results_[2], + results_[3]-results_[1], // Width + results_[4]-results_[2] // Height }, frame.rows, frame.cols); - const float score = sigmoid(results[0]); + const float score = sigmoid(results_[0]); return { score, roi }; } @@ -370,90 +370,91 @@ std::pair Localizer::run( double Localizer::last_inference_time_millis() const { - return last_inference_time; + return last_inference_time_; } PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&_session) - : model_version{_session.GetModelMetadata().GetVersion()} - , session{std::move(_session)} - , allocator{session, allocator_info} + : model_version_{_session.GetModelMetadata().GetVersion()} + , session_{std::move(_session)} + , allocator_{session_, allocator_info} { using namespace std::literals::string_literals; - if (session.GetOutputCount() < 2) + if (session_.GetOutputCount() < 2) throw std::runtime_error("Invalid Model: must have at least two outputs"); - // WARNING + // WARNING UB .. but still ... // If the model was saved without meta data, it seems the version field is uninitialized. // In that case reading from it is UB. However, we will just get same arbitrary number // which is hopefully different from the numbers used by models where the version is set. - if (model_version != 2) - model_version = 1; + // I.e., this is what happended in practice so far. + if (model_version_ != 2) + model_version_ = 1; - const cv::Size input_image_shape = get_input_image_shape(session); + const cv::Size input_image_shape = get_input_image_shape(session_); - scaled_frame = cv::Mat(input_image_shape, CV_8U); - input_mat = cv::Mat(input_image_shape, CV_32F); + scaled_frame_ = cv::Mat(input_image_shape, CV_8U); + input_mat_ = cv::Mat(input_image_shape, CV_32F); { const std::int64_t input_shape[4] = { 1, 1, input_image_shape.height, input_image_shape.width }; - input_val.push_back( - Ort::Value::CreateTensor(allocator_info, input_mat.ptr(0), input_mat.total(), input_shape, 4)); + input_val_.push_back( + Ort::Value::CreateTensor(allocator_info, input_mat_.ptr(0), input_mat_.total(), input_shape, 4)); } { const std::int64_t output_shape[2] = { 1, 3 }; - output_val.push_back(Ort::Value::CreateTensor( - allocator_info, &output_coord[0], output_coord.rows, output_shape, 2)); + output_val_.push_back(Ort::Value::CreateTensor( + allocator_info, &output_coord_[0], output_coord_.rows, output_shape, 2)); } { const std::int64_t output_shape[2] = { 1, 4 }; - output_val.push_back(Ort::Value::CreateTensor( - allocator_info, &output_quat[0], output_quat.rows, output_shape, 2)); + output_val_.push_back(Ort::Value::CreateTensor( + allocator_info, &output_quat_[0], output_quat_.rows, output_shape, 2)); } size_t num_regular_outputs = 2; - if (session.GetOutputCount() >= 3 && "box"s == session.GetOutputName(2, allocator)) + if (session_.GetOutputCount() >= 3 && "box"s == session_.GetOutputName(2, allocator_)) { const std::int64_t output_shape[2] = { 1, 4 }; - output_val.push_back(Ort::Value::CreateTensor( - allocator_info, &output_box[0], output_box.rows, output_shape, 2)); + output_val_.push_back(Ort::Value::CreateTensor( + allocator_info, &output_box_[0], output_box_.rows, output_shape, 2)); ++num_regular_outputs; qDebug() << "Note: Legacy model output for face ROI is currently ignored"; } - num_recurrent_states = session.GetInputCount()-1; - if (session.GetOutputCount()-num_regular_outputs != num_recurrent_states) + num_recurrent_states_ = session_.GetInputCount()-1; + if (session_.GetOutputCount()-num_regular_outputs != num_recurrent_states_) throw std::runtime_error("Invalid Model: After regular inputs and outputs the model must have equal number of inputs and outputs for tensors holding hidden states of recurrent layers."); // Create tensors for recurrent state - for (size_t i = 0; i < num_recurrent_states; ++i) + for (size_t i = 0; i < num_recurrent_states_; ++i) { - const auto& input_info = session.GetInputTypeInfo(1+i); - const auto& output_info = session.GetOutputTypeInfo(num_regular_outputs+i); + const auto& input_info = session_.GetInputTypeInfo(1+i); + const auto& output_info = session_.GetOutputTypeInfo(num_regular_outputs+i); if (input_info.GetTensorTypeAndShapeInfo().GetShape() != output_info.GetTensorTypeAndShapeInfo().GetShape()) throw std::runtime_error("Invalid Model: Tensors for recurrent hidden states should have same shape on intput and output"); - input_val.push_back(create_tensor(input_info, allocator)); - output_val.push_back(create_tensor(output_info, allocator)); + input_val_.push_back(create_tensor(input_info, allocator_)); + output_val_.push_back(create_tensor(output_info, allocator_)); } - for (size_t i = 0; i < session.GetInputCount(); ++i) + for (size_t i = 0; i < session_.GetInputCount(); ++i) { - input_names.push_back(session.GetInputName(i, allocator)); + input_names_.push_back(session_.GetInputName(i, allocator_)); } - for (size_t i = 0; i < session.GetOutputCount(); ++i) + for (size_t i = 0; i < session_.GetOutputCount(); ++i) { - output_names.push_back(session.GetOutputName(i, allocator)); + output_names_.push_back(session_.GetOutputName(i, allocator_)); } - qDebug() << "Model inputs: " << session.GetInputCount() << ", outputs: " << session.GetOutputCount() << ", recurrent states: " << num_recurrent_states; + qDebug() << "Model inputs: " << session_.GetInputCount() << ", outputs: " << session_.GetOutputCount() << ", recurrent states: " << num_recurrent_states_; - assert (input_names.size() == input_val.size()); - assert (output_names.size() == output_val.size()); + assert (input_names_.size() == input_val_.size()); + assert (output_names_.size() == output_val_.size()); } @@ -464,9 +465,9 @@ int PoseEstimator::find_input_intensity_90_pct_quantile() const float range[] = { 0, 256 }; const float* ranges[] = { range }; cv::Mat hist; - cv::calcHist(&scaled_frame, 1, channels, cv::Mat(), hist, 1, hist_size, ranges, true, false); + cv::calcHist(&scaled_frame_, 1, channels, cv::Mat(), hist, 1, hist_size, ranges, true, false); int gray_level = 0; - const int num_pixels_quantile = scaled_frame.total()*0.9f; + const int num_pixels_quantile = scaled_frame_.total()*0.9f; int num_pixels_accum = 0; for (int i=0; i PoseEstimator::run( if (cropped.rows != patch_size || cropped.cols != patch_size) return {}; - auto p = input_mat.ptr(0); + auto p = input_mat_.ptr(0); - cv::resize(cropped, scaled_frame, scaled_frame.size(), 0, 0, cv::INTER_AREA); + cv::resize(cropped, scaled_frame_, scaled_frame_.size(), 0, 0, cv::INTER_AREA); // Automatic brightness amplification. const int brightness = find_input_intensity_90_pct_quantile(); const double alpha = brightness<127 ? 0.5/std::max(5,brightness) : 1./255; const double beta = -0.5; - scaled_frame.convertTo(input_mat, CV_32F, alpha, beta); + scaled_frame_.convertTo(input_mat_, CV_32F, alpha, beta); - assert (input_mat.ptr(0) == p); - assert (!input_mat.empty() && input_mat.isContinuous()); + assert (input_mat_.ptr(0) == p); + assert (!input_mat_.empty() && input_mat_.isContinuous()); Timer t; t.start(); try { - session.Run( + session_.Run( Ort::RunOptions{ nullptr }, - input_names.data(), - input_val.data(), - input_val.size(), - output_names.data(), - output_val.data(), - output_val.size()); + input_names_.data(), + input_val_.data(), + input_val_.size(), + output_names_.data(), + output_val_.data(), + output_val_.size()); } catch (const Ort::Exception &e) { @@ -532,14 +533,14 @@ std::optional PoseEstimator::run( return {}; } - for (size_t i = 0; i PoseEstimator::run( // issue. The ONNX api suggests that tensor are allocated in an // arena. Does that matter? Maybe the issue is something else? - last_inference_time = t.elapsed_ms(); + last_inference_time_ = t.elapsed_ms(); // Perform coordinate transformation. // From patch-local normalized in [-1,1] to // frame unnormalized pixel coordinatesettings. const cv::Point2f center = patch_center + - (0.5f*patch_size)*cv::Point2f{output_coord[0], output_coord[1]}; + (0.5f*patch_size)*cv::Point2f{output_coord_[0], output_coord_[1]}; - const float size = patch_size*0.5f*output_coord[2]; + const float size = patch_size*0.5f*output_coord_[2]; // Following Eigen which uses quat components in the order w, x, y, z. quat rotation = { - output_quat[3], - output_quat[0], - output_quat[1], - output_quat[2] }; + output_quat_[3], + output_quat_[0], + output_quat_[1], + output_quat_[2] }; - if (model_version < 2) + if (model_version_ < 2) { // Due to a change in coordinate conventions rotation = world_to_image(rotation); } const cv::Rect2f outbox = { - patch_center.x + (0.5f*patch_size)*output_box[0], - patch_center.y + (0.5f*patch_size)*output_box[1], - 0.5f*patch_size*(output_box[2]-output_box[0]), - 0.5f*patch_size*(output_box[3]-output_box[1]) + patch_center.x + (0.5f*patch_size)*output_box_[0], + patch_center.y + (0.5f*patch_size)*output_box_[1], + 0.5f*patch_size*(output_box_[2]-output_box_[0]), + 0.5f*patch_size*(output_box_[3]-output_box_[1]) }; return std::optional({ @@ -587,9 +588,9 @@ std::optional PoseEstimator::run( cv::Mat PoseEstimator::last_network_input() const { cv::Mat ret; - if (!input_mat.empty()) + if (!input_mat_.empty()) { - input_mat.convertTo(ret, CV_8U, 255., 127.); + input_mat_.convertTo(ret, CV_8U, 255., 127.); cv::cvtColor(ret, ret, cv::COLOR_GRAY2RGB); } return ret; @@ -598,11 +599,11 @@ cv::Mat PoseEstimator::last_network_input() const double PoseEstimator::last_inference_time_millis() const { - return last_inference_time; + return last_inference_time_; } -bool neuralnet_tracker::detect() +bool NeuralNetTracker::detect() { double inference_time = 0.; @@ -613,35 +614,35 @@ bool neuralnet_tracker::detect() } }; // Note: BGR colors! - if (!last_localizer_roi || !last_roi || - iou(*last_localizer_roi,*last_roi)<0.25) + if (!last_localizer_roi_ || !last_roi_ || + iou(*last_localizer_roi_,*last_roi_)<0.25) { - auto [p, rect] = localizer->run(grayscale_); - inference_time += localizer->last_inference_time_millis(); + auto [p, rect] = localizer_->run(grayscale_); + inference_time += localizer_->last_inference_time_millis(); if (p > 0.5 || rect.height < 5 || rect.width < 5) { - last_localizer_roi = rect; - last_roi = rect; + last_localizer_roi_ = rect; + last_roi_ = rect; } else { - last_roi.reset(); - last_localizer_roi.reset(); + last_roi_.reset(); + last_localizer_roi_.reset(); } } - if (!last_roi) + if (!last_roi_) { draw_gizmos({}, {}); return false; } - auto face = poseestimator->run(grayscale_, *last_roi); - inference_time += poseestimator->last_inference_time_millis(); + auto face = poseestimator_->run(grayscale_, *last_roi_); + inference_time += poseestimator_->last_inference_time_millis(); if (!face) { - last_roi.reset(); + last_roi_.reset(); draw_gizmos(*face, {}); return false; } @@ -653,7 +654,7 @@ bool neuralnet_tracker::detect() // been tweaked so that it works pretty well. // In old behaviour ROI is taken from the model outputs const vec3 offset = rotate_vec(face->rotation, vec3{0.f, 0.1f*face->size, face->size*0.3f}); - const float halfsize = face->size/float(settings.roi_zoom); + const float halfsize = face->size/float(settings_.roi_zoom); face->box = cv::Rect2f( face->center.x + offset[0] - halfsize, face->center.y + offset[1] - halfsize, @@ -662,14 +663,14 @@ bool neuralnet_tracker::detect() ); } - last_roi = ewa_filter(*last_roi, face->box, float(settings.roi_filter_alpha)); + last_roi_ = ewa_filter(*last_roi_, face->box, float(settings_.roi_filter_alpha)); Affine pose = compute_pose(*face); draw_gizmos(*face, pose); { - QMutexLocker lck(&mtx); + QMutexLocker lck(&mtx_); this->pose_ = pose; } @@ -677,18 +678,18 @@ bool neuralnet_tracker::detect() } -void neuralnet_tracker::draw_gizmos( +void NeuralNetTracker::draw_gizmos( const std::optional &face, const Affine& pose) { if (!is_visible_) return; - preview_.draw_gizmos(face, pose, last_roi, last_localizer_roi, world_to_image(pose.t, grayscale_.size(), intrinsics)); + preview_.draw_gizmos(face, pose, last_roi_, last_localizer_roi_, world_to_image(pose.t, grayscale_.size(), intrinsics_)); - if (settings.show_network_input) + if (settings_.show_network_input) { - cv::Mat netinput = poseestimator->last_network_input(); + cv::Mat netinput = poseestimator_->last_network_input(); preview_.overlay_netinput(netinput); } @@ -696,13 +697,13 @@ void neuralnet_tracker::draw_gizmos( } -Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const +Affine NeuralNetTracker::compute_pose(const PoseEstimator::Face &face) const { // Compute the location the network outputs in 3d space. const mat33 rot_correction = compute_rotation_correction( normalize(face.center, grayscale_.rows, grayscale_.cols), - intrinsics.focal_length_w); + intrinsics_.focal_length_w); const mat33 m = rot_correction * quaternion_to_mat33( image_to_world(face.rotation)); @@ -721,18 +722,18 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const */ const vec3 face_world_pos = image_to_world( - face.center.x, face.center.y, face.size, head_size_mm, + face.center.x, face.center.y, face.size, HEAD_SIZE_MM, grayscale_.size(), - intrinsics); + intrinsics_); // But this is in general not the location of the rotation joint in the neck. // So we need an extra offset. Which we determine by solving // z,y,z-pos = head_joint_loc + R_face * offset const vec3 pos = face_world_pos + m * vec3{ - static_cast(settings.offset_fwd), - static_cast(settings.offset_up), - static_cast(settings.offset_right)}; + static_cast(settings_.offset_fwd), + static_cast(settings_.offset_up), + static_cast(settings_.offset_right)}; return { m, pos }; } @@ -843,13 +844,13 @@ float Preview::transform(float s) const } -neuralnet_tracker::neuralnet_tracker() +NeuralNetTracker::NeuralNetTracker() { opencv_init(); } -neuralnet_tracker::~neuralnet_tracker() +NeuralNetTracker::~NeuralNetTracker() { requestInterruption(); wait(); @@ -858,22 +859,22 @@ neuralnet_tracker::~neuralnet_tracker() } -module_status neuralnet_tracker::start_tracker(QFrame* videoframe) +module_status NeuralNetTracker::start_tracker(QFrame* videoframe) { videoframe->show(); - videoWidget = std::make_unique(videoframe); - layout = std::make_unique(); - layout->setContentsMargins(0, 0, 0, 0); - layout->addWidget(&*videoWidget); - videoframe->setLayout(&*layout); - videoWidget->show(); - num_threads = settings.num_threads; + video_widget_ = std::make_unique(videoframe); + layout_ = std::make_unique(); + layout_->setContentsMargins(0, 0, 0, 0); + layout_->addWidget(&*video_widget_); + videoframe->setLayout(&*layout_); + video_widget_->show(); + num_threads_ = settings_.num_threads; start(); return status_ok(); } -bool neuralnet_tracker::load_and_initialize_model() +bool NeuralNetTracker::load_and_initialize_model() { const QString localizer_model_path_enc = OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-localizer.onnx"; @@ -882,7 +883,7 @@ bool neuralnet_tracker::load_and_initialize_model() try { - env = Ort::Env{ + env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR, "tracker-neuralnet" }; @@ -890,17 +891,17 @@ bool neuralnet_tracker::load_and_initialize_model() // Do thread settings here do anything? // There is a warning which says to control number of threads via // openmp settings. Which is what we do. - opts.SetIntraOpNumThreads(num_threads); + opts.SetIntraOpNumThreads(num_threads_); opts.SetInterOpNumThreads(1); - allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + allocator_info_ = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - localizer.emplace( - allocator_info, - Ort::Session{env, convert(localizer_model_path_enc).c_str(), opts}); + localizer_.emplace( + allocator_info_, + Ort::Session{env_, convert(localizer_model_path_enc).c_str(), opts}); - poseestimator.emplace( - allocator_info, - Ort::Session{env, convert(poseestimator_model_path_enc).c_str(), opts}); + poseestimator_.emplace( + allocator_info_, + Ort::Session{env_, convert(poseestimator_model_path_enc).c_str(), opts}); } catch (const Ort::Exception &e) { @@ -912,17 +913,17 @@ bool neuralnet_tracker::load_and_initialize_model() } -bool neuralnet_tracker::open_camera() +bool NeuralNetTracker::open_camera() { - int rint = std::clamp(*settings.resolution, 0, (int)std::size(resolution_choices)-1); + int rint = std::clamp(*settings_.resolution, 0, (int)std::size(resolution_choices)-1); resolution_tuple res = resolution_choices[rint]; - int fps = enum_to_fps(settings.force_fps); + int fps = enum_to_fps(settings_.force_fps); - QMutexLocker l(&camera_mtx); + QMutexLocker l(&camera_mtx_); - camera = video::make_camera(settings.camera_name); + camera_ = video::make_camera(settings_.camera_name); - if (!camera) + if (!camera_) return false; video::impl::camera::info args {}; @@ -935,9 +936,9 @@ bool neuralnet_tracker::open_camera() if (fps) args.fps = fps; - args.use_mjpeg = settings.use_mjpeg; + args.use_mjpeg = settings_.use_mjpeg; - if (!camera->start(args)) + if (!camera_->start(args)) { qDebug() << "neuralnet tracker: can't open camera"; return false; @@ -947,39 +948,39 @@ bool neuralnet_tracker::open_camera() } -void neuralnet_tracker::set_intrinsics() +void NeuralNetTracker::set_intrinsics() { const int w = grayscale_.cols, h = grayscale_.rows; - const double diag_fov = settings.fov * M_PI / 180.; + const double diag_fov = settings_.fov * M_PI / 180.; const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w)); const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h)); const double focal_length_w = 1. / tan(.5 * fov_w); const double focal_length_h = 1. / tan(.5 * fov_h); - intrinsics.fov_h = fov_h; - intrinsics.fov_w = fov_w; - intrinsics.focal_length_w = focal_length_w; - intrinsics.focal_length_h = focal_length_h; + intrinsics_.fov_h = fov_h; + intrinsics_.fov_w = fov_w; + intrinsics_.focal_length_w = focal_length_w; + intrinsics_.focal_length_h = focal_length_h; } class GuardedThreadCountSwitch { - int old_num_threads_cv = 1; - int old_num_threads_omp = 1; + int old_num_threads_cv_ = 1; + int old_num_threads_omp_ = 1; public: GuardedThreadCountSwitch(int num_threads) { - old_num_threads_cv = cv::getNumThreads(); - old_num_threads_omp = omp_get_num_threads(); + old_num_threads_cv_ = cv::getNumThreads(); + old_num_threads_omp_ = omp_get_num_threads(); omp_set_num_threads(num_threads); cv::setNumThreads(num_threads); } ~GuardedThreadCountSwitch() { - omp_set_num_threads(old_num_threads_omp); - cv::setNumThreads(old_num_threads_cv); + omp_set_num_threads(old_num_threads_omp_); + cv::setNumThreads(old_num_threads_cv_); } GuardedThreadCountSwitch(const GuardedThreadCountSwitch&) = delete; @@ -987,11 +988,11 @@ class GuardedThreadCountSwitch }; -void neuralnet_tracker::run() +void NeuralNetTracker::run() { - preview_.init(*videoWidget); + preview_.init(*video_widget_); - GuardedThreadCountSwitch switch_num_threads_to(num_threads); + GuardedThreadCountSwitch switch_num_threads_to(num_threads_); if (!open_camera()) return; @@ -1006,9 +1007,9 @@ void neuralnet_tracker::run() is_visible_ = check_is_visible(); auto t = clk.now(); { - QMutexLocker l(&camera_mtx); + QMutexLocker l(&camera_mtx_); - auto [ img, res ] = camera->get_frame(); + auto [ img, res ] = camera_->get_frame(); if (!res) { @@ -1047,7 +1048,7 @@ void neuralnet_tracker::run() detect(); if (is_visible_) - preview_.copy_to_widget(*videoWidget); + preview_.copy_to_widget(*video_widget_); update_fps( std::chrono::duration_cast( @@ -1056,7 +1057,7 @@ void neuralnet_tracker::run() } -cv::Mat neuralnet_tracker::prepare_input_image(const video::frame& frame) +cv::Mat NeuralNetTracker::prepare_input_image(const video::frame& frame) { auto img = cv::Mat(frame.height, frame.width, CV_8UC(frame.channels), (void*)frame.data, frame.stride); @@ -1083,23 +1084,23 @@ cv::Mat neuralnet_tracker::prepare_input_image(const video::frame& frame) } -void neuralnet_tracker::update_fps(double dt) +void NeuralNetTracker::update_fps(double dt) { const double alpha = dt/(dt + RC); if (dt > 1e-6) { QMutexLocker lck{&stats_mtx_}; - fps *= 1 - alpha; - fps += alpha * 1./dt; + fps_ *= 1 - alpha; + fps_ += alpha * 1./dt; } } -void neuralnet_tracker::data(double *data) +void NeuralNetTracker::data(double *data) { Affine tmp = [&]() { - QMutexLocker lck(&mtx); + QMutexLocker lck(&mtx_); return pose_; }(); @@ -1124,29 +1125,29 @@ void neuralnet_tracker::data(double *data) } -Affine neuralnet_tracker::pose() +Affine NeuralNetTracker::pose() { - QMutexLocker lck(&mtx); + QMutexLocker lck(&mtx_); return pose_; } -std::tuple neuralnet_tracker::stats() const +std::tuple NeuralNetTracker::stats() const { QMutexLocker lck(&stats_mtx_); - return { resolution_, fps, inference_time_ }; + return { resolution_, fps_, inference_time_ }; } -void neuralnet_dialog::make_fps_combobox() +void NeuralNetDialog::make_fps_combobox() { for (int k = 0; k < fps_MAX; k++) { const int hz = enum_to_fps(k); const QString name = (hz == 0) ? tr("Default") : QString::number(hz); - ui.cameraFPS->addItem(name, k); + ui_.cameraFPS->addItem(name, k); } } -void neuralnet_dialog::make_resolution_combobox() +void NeuralNetDialog::make_resolution_combobox() { int k=0; for (const auto [w, h] : resolution_choices) @@ -1154,125 +1155,125 @@ void neuralnet_dialog::make_resolution_combobox() const QString s = (w == 0) ? tr("Default") : QString::number(w) + " x " + QString::number(h); - ui.resolution->addItem(s, k++); + ui_.resolution->addItem(s, k++); } } -neuralnet_dialog::neuralnet_dialog() : - trans_calib(1, 2) +NeuralNetDialog::NeuralNetDialog() : + trans_calib_(1, 2) { - ui.setupUi(this); + ui_.setupUi(this); make_fps_combobox(); make_resolution_combobox(); for (const auto& str : video::camera_names()) - ui.cameraName->addItem(str); + ui_.cameraName->addItem(str); - tie_setting(settings.camera_name, ui.cameraName); - tie_setting(settings.fov, ui.cameraFOV); - tie_setting(settings.offset_fwd, ui.tx_spin); - tie_setting(settings.offset_up, ui.ty_spin); - tie_setting(settings.offset_right, ui.tz_spin); - tie_setting(settings.show_network_input, ui.showNetworkInput); - tie_setting(settings.roi_filter_alpha, ui.roiFilterAlpha); - tie_setting(settings.use_mjpeg, ui.use_mjpeg); - tie_setting(settings.roi_zoom, ui.roiZoom); - tie_setting(settings.num_threads, ui.threadCount); - tie_setting(settings.resolution, ui.resolution); - tie_setting(settings.force_fps, ui.cameraFPS); + tie_setting(settings_.camera_name, ui_.cameraName); + tie_setting(settings_.fov, ui_.cameraFOV); + tie_setting(settings_.offset_fwd, ui_.tx_spin); + tie_setting(settings_.offset_up, ui_.ty_spin); + tie_setting(settings_.offset_right, ui_.tz_spin); + tie_setting(settings_.show_network_input, ui_.showNetworkInput); + tie_setting(settings_.roi_filter_alpha, ui_.roiFilterAlpha); + tie_setting(settings_.use_mjpeg, ui_.use_mjpeg); + tie_setting(settings_.roi_zoom, ui_.roiZoom); + tie_setting(settings_.num_threads, ui_.threadCount); + tie_setting(settings_.resolution, ui_.resolution); + tie_setting(settings_.force_fps, ui_.cameraFPS); - connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); - connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); - connect(ui.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings())); + connect(ui_.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); + connect(ui_.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); + connect(ui_.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings())); - connect(&settings.camera_name, value_::value_changed(), this, &neuralnet_dialog::update_camera_settings_state); + connect(&settings_.camera_name, value_::value_changed(), this, &NeuralNetDialog::update_camera_settings_state); - update_camera_settings_state(settings.camera_name); + update_camera_settings_state(settings_.camera_name); - connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step); - calib_timer.setInterval(35); - connect(ui.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool))); + connect(&calib_timer_, &QTimer::timeout, this, &NeuralNetDialog::trans_calib_step); + calib_timer_.setInterval(35); + connect(ui_.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool))); - connect(&tracker_status_poll_timer, &QTimer::timeout, this, &neuralnet_dialog::status_poll); - tracker_status_poll_timer.setInterval(250); - tracker_status_poll_timer.start(); + connect(&tracker_status_poll_timer_, &QTimer::timeout, this, &NeuralNetDialog::status_poll); + tracker_status_poll_timer_.setInterval(250); + tracker_status_poll_timer_.start(); } -void neuralnet_dialog::doOK() +void NeuralNetDialog::doOK() { - settings.b->save(); + settings_.b->save(); close(); } -void neuralnet_dialog::doCancel() +void NeuralNetDialog::doCancel() { close(); } -void neuralnet_dialog::camera_settings() +void NeuralNetDialog::camera_settings() { - if (tracker) + if (tracker_) { - QMutexLocker l(&tracker->camera_mtx); - (void)tracker->camera->show_dialog(); + QMutexLocker l(&tracker_->camera_mtx_); + (void)tracker_->camera_->show_dialog(); } else - (void)video::show_dialog(settings.camera_name); + (void)video::show_dialog(settings_.camera_name); } -void neuralnet_dialog::update_camera_settings_state(const QString& name) +void NeuralNetDialog::update_camera_settings_state(const QString& name) { (void)name; - ui.camera_settings->setEnabled(true); + ui_.camera_settings->setEnabled(true); } -void neuralnet_dialog::register_tracker(ITracker * x) +void NeuralNetDialog::register_tracker(ITracker * x) { - tracker = static_cast(x); - ui.tcalib_button->setEnabled(true); + tracker_ = static_cast(x); + ui_.tcalib_button->setEnabled(true); } -void neuralnet_dialog::unregister_tracker() +void NeuralNetDialog::unregister_tracker() { - tracker = nullptr; - ui.tcalib_button->setEnabled(false); + tracker_ = nullptr; + ui_.tcalib_button->setEnabled(false); } -void neuralnet_dialog::status_poll() +void NeuralNetDialog::status_poll() { QString status; - if (!tracker) + if (!tracker_) { status = tr("Tracker Offline"); } else { - auto [ res, fps, inference_time ] = tracker->stats(); + auto [ res, fps, inference_time ] = tracker_->stats(); status = tr("%1x%2 @ %3 FPS / Inference: %4 ms").arg(res.width).arg(res.height).arg(int(fps)).arg(int(inference_time)); } - ui.resolution_display->setText(status); + ui_.resolution_display->setText(status); } -void neuralnet_dialog::trans_calib_step() +void NeuralNetDialog::trans_calib_step() { - if (tracker) + if (tracker_) { const Affine X_CM = [&]() { - QMutexLocker l(&calibrator_mutex); - return tracker->pose(); + QMutexLocker l(&calibrator_mutex_); + return tracker_->pose(); }(); - trans_calib.update(X_CM.R, X_CM.t); - auto [_, nsamples] = trans_calib.get_estimate(); + trans_calib_.update(X_CM.R, X_CM.t); + auto [_, nsamples] = trans_calib_.get_estimate(); constexpr int min_yaw_samples = 15; constexpr int min_pitch_samples = 12; @@ -1291,47 +1292,47 @@ void neuralnet_dialog::trans_calib_step() const int nsamples_total = nsamples[0] + nsamples[1]; sample_feedback = tr("%1 samples. Over %2, good!").arg(nsamples_total).arg(min_samples); } - ui.sample_count_display->setText(sample_feedback); + ui_.sample_count_display->setText(sample_feedback); } else startstop_trans_calib(false); } -void neuralnet_dialog::startstop_trans_calib(bool start) +void NeuralNetDialog::startstop_trans_calib(bool start) { - QMutexLocker l(&calibrator_mutex); + QMutexLocker l(&calibrator_mutex_); // FIXME: does not work ... if (start) { qDebug() << "pt: starting translation calibration"; - calib_timer.start(); - trans_calib.reset(); - ui.sample_count_display->setText(QString()); + calib_timer_.start(); + trans_calib_.reset(); + ui_.sample_count_display->setText(QString()); // Tracker must run with zero'ed offset for calibration. - settings.offset_fwd = 0; - settings.offset_up = 0; - settings.offset_right = 0; + settings_.offset_fwd = 0; + settings_.offset_up = 0; + settings_.offset_right = 0; } else { - calib_timer.stop(); + calib_timer_.stop(); qDebug() << "pt: stopping translation calibration"; { - auto [tmp, nsamples] = trans_calib.get_estimate(); - settings.offset_fwd = int(tmp[0]); - settings.offset_up = int(tmp[1]); - settings.offset_right = int(tmp[2]); + auto [tmp, nsamples] = trans_calib_.get_estimate(); + settings_.offset_fwd = int(tmp[0]); + settings_.offset_up = int(tmp[1]); + settings_.offset_right = int(tmp[2]); } } - ui.tx_spin->setEnabled(!start); - ui.ty_spin->setEnabled(!start); - ui.tz_spin->setEnabled(!start); + ui_.tx_spin->setEnabled(!start); + ui_.ty_spin->setEnabled(!start); + ui_.tz_spin->setEnabled(!start); if (start) - ui.tcalib_button->setText(tr("Stop calibration")); + ui_.tcalib_button->setText(tr("Stop calibration")); else - ui.tcalib_button->setText(tr("Start calibration")); + ui_.tcalib_button->setText(tr("Start calibration")); } @@ -1339,4 +1340,4 @@ Settings::Settings() : opts("neuralnet-tracker") {} } // neuralnet_tracker_ns -OPENTRACK_DECLARE_TRACKER(neuralnet_tracker, neuralnet_dialog, neuralnet_metadata) +OPENTRACK_DECLARE_TRACKER(NeuralNetTracker, NeuralNetDialog, NeuralNetMetadata) diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index 00b5f220..9b481186 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -107,14 +107,14 @@ class Localizer double last_inference_time_millis() const; private: - inline static constexpr int input_img_width = 288; - inline static constexpr int input_img_height = 224; - Ort::Session session{nullptr}; + inline static constexpr int INPUT_IMG_WIDTH = 288; + inline static constexpr int INPUT_IMG_HEIGHT = 224; + Ort::Session session_{nullptr}; // Inputs / outputs - cv::Mat scaled_frame{}, input_mat{}; - Ort::Value input_val{nullptr}, output_val{nullptr}; - std::array results; - double last_inference_time = 0; + cv::Mat scaled_frame_{}, input_mat_{}; + Ort::Value input_val_{nullptr}, output_val_{nullptr}; + std::array results_; + double last_inference_time_ = 0; }; @@ -124,7 +124,6 @@ class PoseEstimator struct Face { std::array rotation; // Quaternion, (w, x, y, z) - // The following quantities are defined wrt the image space of the input cv::Rect2f box; cv::Point2f center; float size; @@ -132,7 +131,11 @@ class PoseEstimator PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session); - // Inference + /** Inference + * + * Coordinates are defined wrt. the image space of the input `frame`. + * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom) + */ std::optional run(const cv::Mat &frame, const cv::Rect &box); // Returns an image compatible with the 'frame' image for displaying. cv::Mat last_network_input() const; @@ -141,21 +144,21 @@ class PoseEstimator // Operates on the private image data members int find_input_intensity_90_pct_quantile() const; - int64_t model_version = 0; - Ort::Session session{nullptr}; - Ort::Allocator allocator; + int64_t model_version_ = 0; // Queried meta data from the ONNX file + Ort::Session session_{nullptr}; // ONNX's runtime context for running the model + Ort::Allocator allocator_; // Memory allocator for tensors // Inputs - cv::Mat scaled_frame{}, input_mat{}; - std::vector input_val; - std::vector input_names; + cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?) + std::vector input_val_; // Tensors to put into the model + std::vector input_names_; // Refers to the names in the onnx model. // Outputs - cv::Vec output_coord{}; - cv::Vec output_quat{}; - cv::Vec output_box{}; - std::vector output_val; - std::vector output_names; - size_t num_recurrent_states = 0; - double last_inference_time = 0; + cv::Vec output_coord_{}; // 2d Coordinate and head size output. + cv::Vec output_quat_{}; // Quaternion output + cv::Vec output_box_{}; // Bounding box output + std::vector output_val_; // Tensors to put the model outputs in. + std::vector output_names_; // Refers to the names in the onnx model. + size_t num_recurrent_states_ = 0; + double last_inference_time_ = 0; }; @@ -186,20 +189,20 @@ private: }; -class neuralnet_tracker : protected virtual QThread, public ITracker +class NeuralNetTracker : protected virtual QThread, public ITracker { Q_OBJECT public: - neuralnet_tracker(); - ~neuralnet_tracker() override; + NeuralNetTracker(); + ~NeuralNetTracker() override; module_status start_tracker(QFrame* frame) override; void data(double *data) override; void run() override; Affine pose(); std::tuple stats() const; - QMutex camera_mtx; - std::unique_ptr camera; + QMutex camera_mtx_; + std::unique_ptr camera_; private: bool detect(); @@ -213,58 +216,56 @@ private: void update_fps(double dt); Affine compute_pose(const PoseEstimator::Face &face) const; - Settings settings; - std::optional localizer; - std::optional poseestimator; - Ort::Env env{nullptr}; - Ort::MemoryInfo allocator_info{nullptr}; + Settings settings_; + std::optional localizer_; + std::optional poseestimator_; + Ort::Env env_{nullptr}; + Ort::MemoryInfo allocator_info_{nullptr}; - CamIntrinsics intrinsics{}; + CamIntrinsics intrinsics_{}; cv::Mat grayscale_; std::array downsized_original_images_ = {}; // Image pyramid - std::optional last_localizer_roi; - std::optional last_roi; - static constexpr float head_size_mm = 200.f; + std::optional last_localizer_roi_; + std::optional last_roi_; + static constexpr float HEAD_SIZE_MM = 200.f; mutable QMutex stats_mtx_; - double fps = 0; + double fps_ = 0; double inference_time_ = 0; cv::Size resolution_ = {}; static constexpr double RC = .25; - int num_threads = 1; + int num_threads_ = 1; bool is_visible_ = true; - QMutex mtx; // Protects the pose + QMutex mtx_; // Protects the pose Affine pose_; Preview preview_; - std::unique_ptr videoWidget; - std::unique_ptr layout; + std::unique_ptr video_widget_; + std::unique_ptr layout_; }; -class neuralnet_dialog : public ITrackerDialog +class NeuralNetDialog : public ITrackerDialog { Q_OBJECT public: - neuralnet_dialog(); + NeuralNetDialog(); void register_tracker(ITracker * x) override; void unregister_tracker() override; private: void make_fps_combobox(); void make_resolution_combobox(); - Ui::Form ui; - Settings settings; - + Ui::Form ui_; + Settings settings_; // Calibration code mostly taken from point tracker - QTimer calib_timer; - TranslationCalibrator trans_calib; - QMutex calibrator_mutex; - QTimer tracker_status_poll_timer; - neuralnet_tracker* tracker = nullptr; - + QTimer calib_timer_; + TranslationCalibrator trans_calib_; + QMutex calibrator_mutex_; + QTimer tracker_status_poll_timer_; + NeuralNetTracker* tracker_ = nullptr; private Q_SLOTS: void doOK(); @@ -277,7 +278,7 @@ private Q_SLOTS: }; -class neuralnet_metadata : public Metadata +class NeuralNetMetadata : public Metadata { Q_OBJECT QString name() override { return QString("neuralnet tracker"); } @@ -287,6 +288,6 @@ class neuralnet_metadata : public Metadata } // neuralnet_tracker_ns -using neuralnet_tracker_ns::neuralnet_tracker; -using neuralnet_tracker_ns::neuralnet_dialog; -using neuralnet_tracker_ns::neuralnet_metadata; +using neuralnet_tracker_ns::NeuralNetTracker; +using neuralnet_tracker_ns::NeuralNetDialog; +using neuralnet_tracker_ns::NeuralNetMetadata; diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts index 92ad65f1..dbcd3c8c 100644 --- a/tracker-neuralnet/lang/nl_NL.ts +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -114,37 +114,37 @@ Don't roll or change position. - neuralnet_tracker_ns::neuralnet_dialog + neuralnet_tracker_ns::NeuralNetDialog Default Standaard - %1 yaw samples. Yaw more to %2 samples for stable calibration. + Tracker Offline - %1 pitch samples. Pitch more to %2 samples for stable calibration. + %1x%2 @ %3 FPS / Inference: %4 ms - %1 samples. Over %2, good! + %1 yaw samples. Yaw more to %2 samples for stable calibration. - Stop calibration + %1 pitch samples. Pitch more to %2 samples for stable calibration. - Start calibration + %1 samples. Over %2, good! - Tracker Offline + Stop calibration - %1x%2 @ %3 FPS / Inference: %4 ms + Start calibration diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index dfa7d042..a80c7e3d 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -114,37 +114,37 @@ Don't roll or change position. - neuralnet_tracker_ns::neuralnet_dialog + neuralnet_tracker_ns::NeuralNetDialog Default - %1 yaw samples. Yaw more to %2 samples for stable calibration. + Tracker Offline - %1 pitch samples. Pitch more to %2 samples for stable calibration. + %1x%2 @ %3 FPS / Inference: %4 ms - %1 samples. Over %2, good! + %1 yaw samples. Yaw more to %2 samples for stable calibration. - Stop calibration + %1 pitch samples. Pitch more to %2 samples for stable calibration. - Start calibration + %1 samples. Over %2, good! - Tracker Offline + Stop calibration - %1x%2 @ %3 FPS / Inference: %4 ms + Start calibration diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts index a74d272f..4cde86a9 100644 --- a/tracker-neuralnet/lang/stub.ts +++ b/tracker-neuralnet/lang/stub.ts @@ -114,37 +114,37 @@ Don't roll or change position. - neuralnet_tracker_ns::neuralnet_dialog + neuralnet_tracker_ns::NeuralNetDialog Default - %1 yaw samples. Yaw more to %2 samples for stable calibration. + Tracker Offline - %1 pitch samples. Pitch more to %2 samples for stable calibration. + %1x%2 @ %3 FPS / Inference: %4 ms - %1 samples. Over %2, good! + %1 yaw samples. Yaw more to %2 samples for stable calibration. - Stop calibration + %1 pitch samples. Pitch more to %2 samples for stable calibration. - Start calibration + %1 samples. Over %2, good! - Tracker Offline + Stop calibration - %1x%2 @ %3 FPS / Inference: %4 ms + Start calibration diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts index 9c936e5c..c3a91211 100644 --- a/tracker-neuralnet/lang/zh_CN.ts +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -114,37 +114,37 @@ Don't roll or change position. - neuralnet_tracker_ns::neuralnet_dialog + neuralnet_tracker_ns::NeuralNetDialog Default - %1 yaw samples. Yaw more to %2 samples for stable calibration. + Tracker Offline - %1 pitch samples. Pitch more to %2 samples for stable calibration. + %1x%2 @ %3 FPS / Inference: %4 ms - %1 samples. Over %2, good! + %1 yaw samples. Yaw more to %2 samples for stable calibration. - Stop calibration + %1 pitch samples. Pitch more to %2 samples for stable calibration. - Start calibration + %1 samples. Over %2, good! - Tracker Offline + Stop calibration - %1x%2 @ %3 FPS / Inference: %4 ms + Start calibration -- cgit v1.2.3