summaryrefslogtreecommitdiffhomepage
path: root/tracker-neuralnet/preview.cpp
diff options
context:
space:
mode:
authorMichael Welter <michael@welter-4d.de>2022-09-11 20:55:26 +0200
committerStanislaw Halik <sthalik@misaki.pl>2022-11-01 13:51:35 +0100
commit08f1fcad1c74e25f97641a0ccbd229b267ec528c (patch)
tree000b1b276bc7df4a74fd493dab05bcce68801de8 /tracker-neuralnet/preview.cpp
parent77d6abaf53dbe2ee6334bd59b112e25d694a2f65 (diff)
tracker/nn: Tweaks, refactoring, a deadzone filtering and support for uncertainty estimation
* Add rudimentary test for two functions .. maybe more in future * Fix the rotation correction from vertical translation * Move preview class to new files * Move neural network model adapters to new files * Add utility functions for opencv * Query the model inputs/outputs by name to see what is available * Supports outputs for standard deviation of the data distribution - What you get if you let your model output the full parameters of a gaussian distribution (depending on the inputs) and fit it with negative log likelihood loss. * Disabled support for sequence models * Add support for detection of eye open/close classification. Scale uncertainty estimate up if eyes closed * Add a deadzone filter which activates if the model supports uncertainty quantification. The deadzone scales becomes larger the more uncertain the model/data are. This is mostly supposed to be useful to suppress large estimate errors when the user blinks with the eyes * Fix distance being twice of what it should have been
Diffstat (limited to 'tracker-neuralnet/preview.cpp')
-rw-r--r--tracker-neuralnet/preview.cpp135
1 files changed, 135 insertions, 0 deletions
diff --git a/tracker-neuralnet/preview.cpp b/tracker-neuralnet/preview.cpp
new file mode 100644
index 00000000..76a6bbc0
--- /dev/null
+++ b/tracker-neuralnet/preview.cpp
@@ -0,0 +1,135 @@
+#include "preview.h"
+
+
+namespace neuralnet_tracker_ns
+{
+
+
+cv::Rect make_crop_rect_for_aspect(const cv::Size &size, int aspect_w, int aspect_h)
+{
+ auto [w, h] = size;
+ if ( w*aspect_h > aspect_w*h )
+ {
+ // Image is too wide
+ const int new_w = (aspect_w*h)/aspect_h;
+ return cv::Rect((w - new_w)/2, 0, new_w, h);
+ }
+ else
+ {
+ const int new_h = (aspect_h*w)/aspect_w;
+ return cv::Rect(0, (h - new_h)/2, w, new_h);
+ }
+}
+
+
+
+
+void Preview::init(const cv_video_widget& widget)
+{
+ auto [w,h] = widget.preview_size();
+ preview_size_ = { w, h };
+}
+
+
+void Preview::copy_video_frame(const cv::Mat& frame)
+{
+ cv::Rect roi = make_crop_rect_for_aspect(frame.size(), preview_size_.width, preview_size_.height);
+
+ cv::resize(frame(roi), preview_image_, preview_size_, 0, 0, cv::INTER_NEAREST);
+
+ offset_ = { (float)-roi.x, (float)-roi.y };
+ scale_ = float(preview_image_.cols) / float(roi.width);
+}
+
+
+void Preview::draw_gizmos(
+ const std::optional<PoseEstimator::Face> &face,
+ const std::optional<cv::Rect2f>& last_roi,
+ const std::optional<cv::Rect2f>& last_localizer_roi,
+ const cv::Point2f& neckjoint_position)
+{
+ if (preview_image_.empty())
+ return;
+
+ if (last_roi)
+ {
+ const int col = 255;
+ cv::rectangle(preview_image_, transform(*last_roi), cv::Scalar(0, col, 0), /*thickness=*/1);
+ }
+ if (last_localizer_roi)
+ {
+ const int col = 255;
+ cv::rectangle(preview_image_, transform(*last_localizer_roi), cv::Scalar(col, 0, 255-col), /*thickness=*/1);
+ }
+
+ if (face)
+ {
+ if (face->size>=1.f)
+ cv::circle(preview_image_, static_cast<cv::Point>(transform(face->center)), int(transform(face->size)), cv::Scalar(255,255,255), 2);
+ cv::circle(preview_image_, static_cast<cv::Point>(transform(face->center)), 3, cv::Scalar(255,255,255), -1);
+
+ const cv::Matx33f R = face->rotation.toRotMat3x3(cv::QUAT_ASSUME_UNIT);
+
+ auto draw_coord_line = [&](int i, const cv::Scalar& color)
+ {
+ const float vx = R(0,i);
+ const float vy = R(1,i);
+ static constexpr float len = 100.f;
+ cv::Point q = face->center + len*cv::Point2f{vx, vy};
+ cv::line(preview_image_, static_cast<cv::Point>(transform(face->center)), static_cast<cv::Point>(transform(q)), color, 2);
+ };
+ draw_coord_line(0, {0, 0, 255});
+ draw_coord_line(1, {0, 255, 0});
+ draw_coord_line(2, {255, 0, 0});
+
+ // Draw the computed joint position
+ auto xy = transform(neckjoint_position);
+ cv::circle(preview_image_, cv::Point(xy.x,xy.y), 5, cv::Scalar(0,0,255), -1);
+ }
+
+
+}
+
+void Preview::overlay_netinput(const cv::Mat& netinput)
+{
+ if (netinput.empty())
+ return;
+
+ const int w = std::min(netinput.cols, preview_image_.cols);
+ const int h = std::min(netinput.rows, preview_image_.rows);
+ cv::Rect roi(0, 0, w, h);
+ netinput(roi).copyTo(preview_image_(roi));
+}
+
+void Preview::draw_fps(double fps, double last_inference_time)
+{
+ char buf[128];
+ ::snprintf(buf, sizeof(buf), "%d Hz, pose inference: %d ms", std::clamp(int(fps), 0, 9999), int(last_inference_time));
+ cv::putText(preview_image_, buf, cv::Point(10, preview_image_.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1);
+}
+
+
+void Preview::copy_to_widget(cv_video_widget& widget)
+{
+ if (preview_image_.rows > 0)
+ widget.update_image(preview_image_);
+}
+
+
+cv::Rect2f Preview::transform(const cv::Rect2f& r) const
+{
+ return { (r.x - offset_.x)*scale_, (r.y - offset_.y)*scale_, r.width*scale_, r.height*scale_ };
+}
+
+cv::Point2f Preview::transform(const cv::Point2f& p) const
+{
+ return { (p.x - offset_.x)*scale_ , (p.y - offset_.y)*scale_ };
+}
+
+float Preview::transform(float s) const
+{
+ return s * scale_;
+}
+
+
+} \ No newline at end of file