summaryrefslogtreecommitdiffhomepage
path: root/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tracker-neuralnet/ftnoir_tracker_neuralnet.cpp')
-rw-r--r--tracker-neuralnet/ftnoir_tracker_neuralnet.cpp976
1 files changed, 976 insertions, 0 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
new file mode 100644
index 00000000..c55ddf0c
--- /dev/null
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
@@ -0,0 +1,976 @@
+/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ */
+
+#include "ftnoir_tracker_neuralnet.h"
+#include "deadzone_filter.h"
+#include "opencv_contrib.h"
+
+#include "compat/sleep.hpp"
+#include "compat/math-imports.hpp"
+#include "compat/timer.hpp"
+#include "compat/check-visible.hpp"
+#include "cv/init.hpp"
+
+#include <omp.h>
+#include <onnxruntime_cxx_api.h>
+#include <opencv2/core.hpp>
+#include <opencv2/core/quaternion.hpp>
+
+#ifdef _MSC_VER
+# pragma warning(disable : 4702)
+#endif
+
+#include <QMutexLocker>
+#include <QDebug>
+#include <QFile>
+#include <QFileDialog>
+#include <QFileInfo>
+
+#include <cstdio>
+#include <cmath>
+#include <algorithm>
+#include <chrono>
+#include <string>
+#include <stdexcept>
+#include <unordered_map>
+
+// Some demo code for onnx
+// https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp
+// https://github.com/leimao/ONNX-Runtime-Inference/blob/main/src/inference.cpp
+
+namespace neuralnet_tracker_ns
+{
+
+using namespace cvcontrib;
+
+using numeric_types::vec3;
+using numeric_types::vec2;
+using numeric_types::mat33;
+
+#if _MSC_VER
+std::wstring convert(const QString &s) { return s.toStdWString(); }
+#else
+std::string convert(const QString &s) { return s.toStdString(); }
+#endif
+
+
+QDir get_default_model_directory()
+{
+ return QDir(OPENTRACK_BASE_PATH+ "/" OPENTRACK_LIBRARY_PATH "models");
+}
+
+
+int enum_to_fps(int value)
+{
+ int fps = 0;
+
+ switch (value)
+ {
+ default: eval_once(qDebug() << "neuralnet tracker: invalid fps enum value");
+ [[fallthrough]];
+ case fps_default: fps = 0; break;
+ case fps_30: fps = 30; break;
+ case fps_60: fps = 60; break;
+ case fps_75: fps = 75; break;
+ case fps_125: fps = 125; break;
+ case fps_200: fps = 200; break;
+ case fps_50: fps = 50; break;
+ case fps_100: fps = 100; break;
+ case fps_120: fps = 120; break;
+ case fps_300: fps = 300; break;
+ case fps_250: fps = 250; break;
+ }
+
+ return fps;
+}
+
+
+template<class F>
+struct OnScopeExit
+{
+ explicit OnScopeExit(F&& f) : f_{ f } {}
+ ~OnScopeExit() noexcept
+ {
+ f_();
+ }
+ F f_;
+};
+
+
+CamIntrinsics make_intrinsics(const cv::Mat& img, const Settings& settings)
+{
+ const int w = img.cols, h = img.rows;
+ const double diag_fov = settings.fov * M_PI / 180.;
+ const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));
+ const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));
+ const double focal_length_w = 1. / tan(.5 * fov_w);
+ const double focal_length_h = 1. / tan(.5 * fov_h);
+ /* a
+ ______ <--- here is sensor area
+ | /
+ | /
+ f | /
+ | / 2 x angle is the fov
+ |/
+ <--- here is the hole of the pinhole camera
+
+ So, a / f = tan(fov / 2)
+ => f = a/tan(fov/2)
+ What is a?
+ 1 if we define f in terms of clip space where the image plane goes from -1 to 1. Because a is the half-width.
+ */
+
+ return {
+ (float)focal_length_w,
+ (float)focal_length_h,
+ (float)fov_w,
+ (float)fov_h
+ };
+}
+
+
+cv::Rect make_crop_rect_multiple_of(const cv::Size &size, int multiple)
+{
+ const int new_w = (size.width / multiple) * multiple;
+ const int new_h = (size.height / multiple) * multiple;
+ return cv::Rect(
+ (size.width-new_w)/2,
+ (size.height-new_h)/2,
+ new_w,
+ new_h
+ );
+}
+
+template<class T>
+cv::Rect_<T> squarize(const cv::Rect_<T> &r)
+{
+ cv::Point_<T> c{r.x + r.width/T(2), r.y + r.height/T(2)};
+ const T sz = std::max(r.height, r.width);
+ return {c.x - sz/T(2), c.y - sz/T(2), sz, sz};
+}
+
+
+template<class T>
+cv::Rect_<T> expand(const cv::Rect_<T>& r, T factor)
+{
+ // xnew = l+.5*w - w*f*0.5 = l + .5*(w - new_w)
+ const cv::Size_<T> new_size = { r.width * factor, r.height * factor };
+ const cv::Point_<T> new_tl = r.tl() + (as_point(r.size()) - as_point(new_size)) / T(2);
+ return cv::Rect_<T>(new_tl, new_size);
+}
+
+
+template<class T>
+cv::Rect_<T> ewa_filter(const cv::Rect_<T>& last, const cv::Rect_<T>& current, T alpha)
+{
+ const auto last_center = T(0.5) * (last.tl() + last.br());
+ const auto cur_center = T(0.5) * (current.tl() + current.br());
+ const cv::Point_<T> new_size = as_point(last.size()) + alpha * (as_point(current.size()) - as_point(last.size()));
+ const cv::Point_<T> new_center = last_center + alpha * (cur_center - last_center);
+ return cv::Rect_<T>(new_center - T(0.5) * new_size, as_size(new_size));
+}
+
+
+cv::Vec3f image_to_world(float x, float y, float size, float reference_size_in_mm, const cv::Size2i& image_size, const CamIntrinsics& intrinsics)
+{
+ /*
+ Compute the location the network outputs in 3d space.
+
+ hhhhhh <- head size (meters)
+ \ | -----------------------
+ \ | \
+ \ | |
+ \ | |- x (meters)
+ ____ <- face.size / width |
+ \ | | |
+ \| |- focal length /
+ ------------------------
+ ------------------------------------------------>> z direction
+ z/x = zi / f
+ zi = image position
+ z = world position
+ f = focal length
+
+ We can also do deltas:
+ dz / x = dzi / f
+ => x = dz / dzi * f
+ which means we can compute x from the head size (dzi) if we assume some reference size (dz).
+ */
+ const float head_size_vertical = 2.f*size; // Size from the model is more like half the real vertical size of a human head.
+ const float xpos = -(intrinsics.focal_length_w * image_size.width * 0.5f) / head_size_vertical * reference_size_in_mm;
+ const float zpos = (x / image_size.width * 2.f - 1.f) * xpos / intrinsics.focal_length_w;
+ const float ypos = (y / image_size.height * 2.f - 1.f) * xpos / intrinsics.focal_length_h;
+ return {xpos, ypos, zpos};
+}
+
+
+vec2 world_to_image(const cv::Vec3f& pos, const cv::Size2i& image_size, const CamIntrinsics& intrinsics)
+{
+ const float xscr = pos[2] / pos[0] * intrinsics.focal_length_w;
+ const float yscr = pos[1] / pos[0] * intrinsics.focal_length_h;
+ const float x = (xscr+1.)*0.5f*image_size.width;
+ const float y = (yscr+1.)*0.5f*image_size.height;
+ return {x, y};
+}
+
+
+cv::Quatf image_to_world(cv::Quatf q)
+{
+ std::swap(q[1], q[3]);
+ q[1] = -q[1];
+ q[2] = -q[2];
+ q[3] = -q[3];
+ return q;
+}
+
+
+cv::Point2f normalize(const cv::Point2f &p, int h, int w)
+{
+ return {
+ p.x/w*2.f-1.f,
+ p.y/h*2.f-1.f
+ };
+}
+
+
+cv::Quatf rotation_from_two_vectors(const vec3 &a, const vec3 &b)
+{
+ // |axis| = |a| * |b| * sin(alpha)
+ const vec3 axis = a.cross(b);
+ // dot = |a|*|b|*cos(alpha)
+ const float dot = a.dot(b);
+ const float len = cv::norm(axis);
+ vec3 normed_axis = axis / len;
+ float angle = std::atan2(len, dot);
+ if (!(std::isfinite(normed_axis[0]) && std::isfinite(normed_axis[1]) && std::isfinite(normed_axis[2])))
+ {
+ angle = 0.f;
+ normed_axis = vec3{1.,0.,0.};
+ }
+ return cv::Quatf::createFromAngleAxis(angle, normed_axis);
+}
+
+
+// Computes correction due to head being off screen center.
+cv::Quatf compute_rotation_correction(const cv::Point3f& p)
+{
+ return rotation_from_two_vectors(
+ {-1.f,0.f,0.f}, p);
+}
+
+
+// Intersection over union. A value between 0 and 1 which measures the match between the bounding boxes.
+template<class T>
+T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b)
+{
+ auto i = a & b;
+ return double{i.area()} / (a.area()+b.area()-i.area());
+}
+
+
+class GuardedThreadCountSwitch
+{
+ int old_num_threads_cv_ = 1;
+ int old_num_threads_omp_ = 1;
+ public:
+ GuardedThreadCountSwitch(int num_threads)
+ {
+ old_num_threads_cv_ = cv::getNumThreads();
+ old_num_threads_omp_ = omp_get_num_threads();
+ omp_set_num_threads(num_threads);
+ cv::setNumThreads(num_threads);
+ }
+
+ ~GuardedThreadCountSwitch()
+ {
+ omp_set_num_threads(old_num_threads_omp_);
+ cv::setNumThreads(old_num_threads_cv_);
+ }
+
+ GuardedThreadCountSwitch(const GuardedThreadCountSwitch&) = delete;
+ GuardedThreadCountSwitch& operator=(const GuardedThreadCountSwitch&) = delete;
+};
+
+
+bool NeuralNetTracker::detect()
+{
+ double inference_time = 0.;
+
+ OnScopeExit update_inference_time{ [&]() {
+
+ QMutexLocker lck{ &stats_mtx_ };
+ inference_time_ = inference_time;
+ } };
+
+ // If there is no past ROI from the localizer or if the match of its output
+ // with the current ROI is too poor we have to run it again. This causes a
+ // latency spike of maybe an additional 50%. But it only occurs when the user
+ // moves his head far enough - or when the tracking ist lost ...
+ if (!last_localizer_roi_ || !last_roi_ ||
+ iou(*last_localizer_roi_,*last_roi_)<0.25)
+ {
+ auto [p, rect] = localizer_->run(grayscale_);
+ inference_time += localizer_->last_inference_time_millis();
+
+ if (last_roi_ && iou(rect,*last_roi_)>=0.25 && p > 0.5)
+ {
+ // The new ROI matches the result from tracking, so the user is
+ // still there and to not disturb recurrent models, we only update
+ // ...
+ last_localizer_roi_ = rect;
+ }
+ else if (p > 0.5 && rect.height > 32 && rect.width > 32)
+ {
+ // Tracking probably got lost since the ROI's don't match, but the
+ // localizer still finds a face, so we use the ROI from the localizer
+ last_localizer_roi_ = rect;
+ last_roi_ = rect;
+ }
+ else
+ {
+ // Tracking lost and no localization result. The user probably can't be seen.
+ last_roi_.reset();
+ last_localizer_roi_.reset();
+ }
+ }
+
+ if (!last_roi_)
+ {
+ // Last iteration the tracker failed to generate a trustworthy
+ // roi and the localizer also cannot find a face.
+ draw_gizmos({}, {});
+ return false;
+ }
+
+ auto face = poseestimator_->run(grayscale_, *last_roi_);
+ inference_time += poseestimator_->last_inference_time_millis();
+
+ if (!face)
+ {
+ last_roi_.reset();
+ draw_gizmos({}, {});
+ return false;
+ }
+
+ cv::Rect2f roi = expand(face->box, (float)settings_.roi_zoom);
+
+ last_roi_ = ewa_filter(*last_roi_, roi, float(settings_.roi_filter_alpha));
+
+ QuatPose pose = compute_filtered_pose(*face);
+ last_pose_ = pose;
+
+ Affine pose_affine = {
+ pose.rot.toRotMat3x3(cv::QUAT_ASSUME_UNIT),
+ pose.pos };
+
+ {
+ QMutexLocker lck(&mtx_);
+ last_pose_affine_ = pose_affine;
+ }
+
+ draw_gizmos(*face, last_pose_affine_);
+
+ return true;
+}
+
+
+void NeuralNetTracker::draw_gizmos(
+ const std::optional<PoseEstimator::Face> &face,
+ const Affine& pose)
+{
+ if (!is_visible_)
+ return;
+
+ preview_.draw_gizmos(
+ face,
+ last_roi_,
+ last_localizer_roi_,
+ world_to_image(pose.t, grayscale_.size(), intrinsics_));
+
+ if (settings_.show_network_input)
+ {
+ cv::Mat netinput = poseestimator_->last_network_input();
+ preview_.overlay_netinput(netinput);
+ }
+}
+
+
+QuatPose NeuralNetTracker::transform_to_world_pose(const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) const
+{
+ const vec3 face_world_pos = image_to_world(
+ face_xy.x, face_xy.y, face_size, HEAD_SIZE_MM,
+ grayscale_.size(),
+ intrinsics_);
+
+ const cv::Quatf rot_correction = compute_rotation_correction(
+ face_world_pos);
+
+ cv::Quatf rot = rot_correction * image_to_world(face_rotation);
+
+ // But this is in general not the location of the rotation joint in the neck.
+ // So we need an extra offset. Which we determine by computing
+ // z,y,z-pos = head_joint_loc + R_face * offset
+ const vec3 local_offset = vec3{
+ static_cast<float>(settings_.offset_fwd),
+ static_cast<float>(settings_.offset_up),
+ static_cast<float>(settings_.offset_right)};
+ const vec3 offset = rotate(rot, local_offset);
+ const vec3 pos = face_world_pos + offset;
+
+ return { rot, pos };
+}
+
+
+QuatPose NeuralNetTracker::compute_filtered_pose(const PoseEstimator::Face &face)
+{
+ if (fps_ > 0.001 && last_pose_ && poseestimator_->has_uncertainty())
+ {
+ auto image2world = [this](const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) {
+ return this->transform_to_world_pose(face_rotation, face_xy, face_size); };
+
+ return apply_filter(
+ face,
+ *last_pose_,
+ 1./fps_,
+ std::move(image2world),
+ FiltParams{
+ float(settings_.deadzone_hardness),
+ float(settings_.deadzone_size)
+ });
+ }
+ else
+ {
+ return transform_to_world_pose(face.rotation, face.center, face.size);
+ }
+}
+
+
+NeuralNetTracker::NeuralNetTracker()
+{
+ opencv_init();
+ neuralnet_tracker_tests::run();
+}
+
+
+NeuralNetTracker::~NeuralNetTracker()
+{
+ requestInterruption();
+ wait();
+ // fast start/stop causes breakage
+ portable::sleep(1000);
+}
+
+
+module_status NeuralNetTracker::start_tracker(QFrame* videoframe)
+{
+ videoframe->show();
+ video_widget_ = std::make_unique<cv_video_widget>(videoframe);
+ layout_ = std::make_unique<QHBoxLayout>();
+ layout_->setContentsMargins(0, 0, 0, 0);
+ layout_->addWidget(&*video_widget_);
+ videoframe->setLayout(&*layout_);
+ video_widget_->show();
+ num_threads_ = settings_.num_threads;
+ start();
+ return status_ok();
+}
+
+
+bool NeuralNetTracker::load_and_initialize_model()
+{
+ const QString localizer_model_path_enc =
+ OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-localizer.onnx";
+ const QString poseestimator_model_path_enc = get_posenet_filename();
+
+ try
+ {
+ env_ = Ort::Env{
+ OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR,
+ "tracker-neuralnet"
+ };
+ auto opts = Ort::SessionOptions{};
+ // Do thread settings here do anything?
+ // There is a warning which says to control number of threads via
+ // openmp settings. Which is what we do.
+ opts.SetIntraOpNumThreads(num_threads_);
+ opts.SetInterOpNumThreads(1);
+ allocator_info_ = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+
+ localizer_.emplace(
+ allocator_info_,
+ Ort::Session{env_, convert(localizer_model_path_enc).c_str(), opts});
+
+ qDebug() << "Loading pose net " << poseestimator_model_path_enc;
+ poseestimator_.emplace(
+ allocator_info_,
+ Ort::Session{env_, convert(poseestimator_model_path_enc).c_str(), opts});
+ }
+ catch (const Ort::Exception &e)
+ {
+ qDebug() << "Failed to initialize the neural network models. ONNX error message: "
+ << e.what();
+ return false;
+ }
+ catch (const std::exception &e)
+ {
+ qDebug() << "Failed to initialize the neural network models. Error message: " << e.what();
+ return false;
+ }
+
+ return true;
+}
+
+
+bool NeuralNetTracker::open_camera()
+{
+ int rint = std::clamp(*settings_.resolution, 0, (int)std::size(resolution_choices)-1);
+ resolution_tuple res = resolution_choices[rint];
+ int fps = enum_to_fps(settings_.force_fps);
+
+ QMutexLocker l(&camera_mtx_);
+
+ camera_ = video::make_camera(settings_.camera_name);
+
+ if (!camera_)
+ return false;
+
+ video::impl::camera::info args {};
+
+ if (res.width)
+ {
+ args.width = res.width;
+ args.height = res.height;
+ }
+ if (fps)
+ args.fps = fps;
+
+ args.use_mjpeg = settings_.use_mjpeg;
+
+ if (!camera_->start(args))
+ {
+ qDebug() << "neuralnet tracker: can't open camera";
+ return false;
+ }
+
+ return true;
+}
+
+
+void NeuralNetTracker::run()
+{
+ preview_.init(*video_widget_);
+
+ GuardedThreadCountSwitch switch_num_threads_to(num_threads_);
+
+ if (!open_camera())
+ return;
+
+ if (!load_and_initialize_model())
+ return;
+
+ std::chrono::high_resolution_clock clk;
+
+ while (!isInterruptionRequested())
+ {
+ is_visible_ = check_is_visible();
+ auto t = clk.now();
+ {
+ QMutexLocker l(&camera_mtx_);
+
+ auto [ img, res ] = camera_->get_frame();
+
+ if (!res)
+ {
+ l.unlock();
+ portable::sleep(100);
+ continue;
+ }
+
+ {
+ QMutexLocker lck{&stats_mtx_};
+ resolution_ = { img.width, img.height };
+ }
+
+ auto color = prepare_input_image(img);
+
+ if (is_visible_)
+ preview_.copy_video_frame(color);
+
+ switch (img.channels)
+ {
+ case 1:
+ grayscale_.create(img.height, img.width, CV_8UC1);
+ color.copyTo(grayscale_);
+ break;
+ case 3:
+ cv::cvtColor(color, grayscale_, cv::COLOR_BGR2GRAY);
+ break;
+ default:
+ qDebug() << "Can't handle" << img.channels << "color channels";
+ return;
+ }
+ }
+
+ intrinsics_ = make_intrinsics(grayscale_, settings_);
+
+ detect();
+
+ if (is_visible_)
+ preview_.copy_to_widget(*video_widget_);
+
+ update_fps(
+ std::chrono::duration_cast<std::chrono::milliseconds>(
+ clk.now() - t).count()*1.e-3);
+ }
+}
+
+
+cv::Mat NeuralNetTracker::prepare_input_image(const video::frame& frame)
+{
+ auto img = cv::Mat(frame.height, frame.width, CV_8UC(frame.channels), (void*)frame.data, frame.stride);
+
+ // Crop if aspect ratio is not 4:3
+ if (img.rows*4 != img.cols*3)
+ {
+ img = img(make_crop_rect_for_aspect(img.size(), 4, 3));
+ }
+
+ img = img(make_crop_rect_multiple_of(img.size(), 4));
+
+ if (img.cols > 640)
+ {
+ cv::pyrDown(img, downsized_original_images_[0]);
+ img = downsized_original_images_[0];
+ }
+ if (img.cols > 640)
+ {
+ cv::pyrDown(img, downsized_original_images_[1]);
+ img = downsized_original_images_[1];
+ }
+
+ return img;
+}
+
+
+void NeuralNetTracker::update_fps(double dt)
+{
+ const double alpha = dt/(dt + RC);
+ if (dt > 1e-6)
+ {
+ QMutexLocker lck{&stats_mtx_};
+ fps_ *= 1 - alpha;
+ fps_ += alpha * 1./dt;
+ }
+}
+
+
+void NeuralNetTracker::data(double *data)
+{
+ Affine tmp = [&]()
+ {
+ QMutexLocker lck(&mtx_);
+ return last_pose_affine_;
+ }();
+
+ const auto& mx = tmp.R.col(0);
+ const auto& my = tmp.R.col(1);
+ const auto& mz = tmp.R.col(2);
+
+ // For reference: https://en.wikipedia.org/wiki/Euler_angles. Section "Rotation matrix". The relevant matrix is
+ // under "Tait-Bryan angles", row with "Y_alpha Z_beta X_gamma = ...".
+ // Because for the NN tracker x is forward, and y is up. We can see that the x axis is independent of roll. Thus it
+ // is relatively easy to figure out the yaw and pitch angles (alpha and beta).
+ const float yaw = std::atan2(mx(2), mx(0));
+ const float pitch = -std::atan2(-mx(1), std::sqrt(mx(2)*mx(2)+mx(0)*mx(0)));
+ // For the roll angle we recognize that the matrix entries in the second row contain cos(pitch)*cos(roll), and
+ // cos(pitch)*sin(roll). Using atan2 eliminates the common pitch factor and we obtain the roll angle.
+ const float roll = std::atan2(-mz(1), my(1));
+ {
+ constexpr double rad2deg = 180/M_PI;
+ data[Yaw] = rad2deg * yaw;
+ data[Pitch] = rad2deg * pitch;
+ data[Roll] = -rad2deg * roll;
+
+ // convert to cm
+ data[TX] = -tmp.t[2] * 0.1;
+ data[TY] = tmp.t[1] * 0.1;
+ data[TZ] = -tmp.t[0] * 0.1;
+ }
+}
+
+
+Affine NeuralNetTracker::pose()
+{
+ QMutexLocker lck(&mtx_);
+ return last_pose_affine_;
+}
+
+
+std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const
+{
+ QMutexLocker lck(&stats_mtx_);
+ return { resolution_, fps_, inference_time_ };
+}
+
+
+QString NeuralNetTracker::get_posenet_filename() const
+{
+ QString filename = settings_.posenet_file;
+ if (QFileInfo(filename).isRelative())
+ filename = get_default_model_directory().absoluteFilePath(filename);
+ return filename;
+}
+
+
+void NeuralNetDialog::make_fps_combobox()
+{
+ for (int k = 0; k < fps_MAX; k++)
+ {
+ const int hz = enum_to_fps(k);
+ const QString name = (hz == 0) ? tr("Default") : QString::number(hz);
+ ui_.cameraFPS->addItem(name, k);
+ }
+}
+
+void NeuralNetDialog::make_resolution_combobox()
+{
+ int k=0;
+ for (const auto [w, h] : resolution_choices)
+ {
+ const QString s = (w == 0)
+ ? tr("Default")
+ : QString::number(w) + " x " + QString::number(h);
+ ui_.resolution->addItem(s, k++);
+ }
+}
+
+
+NeuralNetDialog::NeuralNetDialog() :
+ trans_calib_(1, 2)
+{
+ ui_.setupUi(this);
+
+ make_fps_combobox();
+ make_resolution_combobox();
+
+ for (const auto& str : video::camera_names())
+ ui_.cameraName->addItem(str);
+
+ tie_setting(settings_.camera_name, ui_.cameraName);
+ tie_setting(settings_.fov, ui_.cameraFOV);
+ tie_setting(settings_.offset_fwd, ui_.tx_spin);
+ tie_setting(settings_.offset_up, ui_.ty_spin);
+ tie_setting(settings_.offset_right, ui_.tz_spin);
+ tie_setting(settings_.show_network_input, ui_.showNetworkInput);
+ tie_setting(settings_.roi_filter_alpha, ui_.roiFilterAlpha);
+ tie_setting(settings_.use_mjpeg, ui_.use_mjpeg);
+ tie_setting(settings_.roi_zoom, ui_.roiZoom);
+ tie_setting(settings_.num_threads, ui_.threadCount);
+ tie_setting(settings_.resolution, ui_.resolution);
+ tie_setting(settings_.force_fps, ui_.cameraFPS);
+ tie_setting(settings_.posenet_file, ui_.posenetFileDisplay);
+
+ connect(ui_.buttonBox, SIGNAL(accepted()), this, SLOT(doOK()));
+ connect(ui_.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel()));
+ connect(ui_.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings()));
+ connect(ui_.posenetSelectButton, SIGNAL(clicked()), this, SLOT(onSelectPoseNetFile()));
+ connect(&settings_.camera_name, value_::value_changed<QString>(), this, &NeuralNetDialog::update_camera_settings_state);
+
+ update_camera_settings_state(settings_.camera_name);
+
+ connect(&calib_timer_, &QTimer::timeout, this, &NeuralNetDialog::trans_calib_step);
+ calib_timer_.setInterval(35);
+ connect(ui_.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool)));
+
+ connect(&tracker_status_poll_timer_, &QTimer::timeout, this, &NeuralNetDialog::status_poll);
+ tracker_status_poll_timer_.setInterval(250);
+ tracker_status_poll_timer_.start();
+}
+
+void NeuralNetDialog::save()
+{
+ settings_.b->save();
+}
+
+void NeuralNetDialog::reload()
+{
+ settings_.b->reload();
+}
+
+void NeuralNetDialog::doOK()
+{
+ save();
+ close();
+}
+
+
+void NeuralNetDialog::doCancel()
+{
+ close();
+}
+
+
+void NeuralNetDialog::camera_settings()
+{
+ if (tracker_)
+ {
+ QMutexLocker l(&tracker_->camera_mtx_);
+ (void)tracker_->camera_->show_dialog();
+ }
+ else
+ (void)video::show_dialog(settings_.camera_name);
+}
+
+
+void NeuralNetDialog::update_camera_settings_state(const QString& name)
+{
+ (void)name;
+ ui_.camera_settings->setEnabled(true);
+}
+
+
+void NeuralNetDialog::register_tracker(ITracker * x)
+{
+ tracker_ = static_cast<NeuralNetTracker*>(x);
+ ui_.tcalib_button->setEnabled(true);
+}
+
+
+void NeuralNetDialog::unregister_tracker()
+{
+ tracker_ = nullptr;
+ ui_.tcalib_button->setEnabled(false);
+}
+
+bool NeuralNetDialog::embeddable() noexcept
+{
+ return true;
+}
+
+void NeuralNetDialog::set_buttons_visible(bool x)
+{
+ ui_.buttonBox->setVisible(x);
+}
+
+void NeuralNetDialog::status_poll()
+{
+ QString status;
+ if (!tracker_)
+ {
+ status = tr("Tracker Offline");
+ }
+ else
+ {
+ auto [ res, fps, inference_time ] = tracker_->stats();
+ status = tr("%1x%2 @ %3 FPS / Inference: %4 ms").arg(res.width).arg(res.height).arg(int(fps)).arg(inference_time, 0, 'f', 1);
+ }
+ ui_.resolution_display->setText(status);
+}
+
+
+void NeuralNetDialog::trans_calib_step()
+{
+ if (tracker_)
+ {
+ const Affine X_CM = [&]() {
+ QMutexLocker l(&calibrator_mutex_);
+ return tracker_->pose();
+ }();
+ trans_calib_.update(X_CM.R, X_CM.t);
+ auto [_, nsamples] = trans_calib_.get_estimate();
+
+ constexpr int min_yaw_samples = 15;
+ constexpr int min_pitch_samples = 12;
+ constexpr int min_samples = min_yaw_samples+min_pitch_samples;
+
+ // Don't bother counting roll samples. Roll calibration is hard enough
+ // that it's a hidden unsupported feature anyway.
+
+ QString sample_feedback;
+ if (nsamples[0] < min_yaw_samples)
+ sample_feedback = tr("%1 yaw samples. Yaw more to %2 samples for stable calibration.").arg(nsamples[0]).arg(min_yaw_samples);
+ else if (nsamples[1] < min_pitch_samples)
+ sample_feedback = tr("%1 pitch samples. Pitch more to %2 samples for stable calibration.").arg(nsamples[1]).arg(min_pitch_samples);
+ else
+ {
+ const int nsamples_total = nsamples[0] + nsamples[1];
+ sample_feedback = tr("%1 samples. Over %2, good!").arg(nsamples_total).arg(min_samples);
+ }
+ ui_.sample_count_display->setText(sample_feedback);
+ }
+ else
+ startstop_trans_calib(false);
+}
+
+
+void NeuralNetDialog::startstop_trans_calib(bool start)
+{
+ QMutexLocker l(&calibrator_mutex_);
+ // FIXME: does not work ...
+ if (start)
+ {
+ qDebug() << "pt: starting translation calibration";
+ calib_timer_.start();
+ trans_calib_.reset();
+ ui_.sample_count_display->setText(QString());
+ // Tracker must run with zero'ed offset for calibration.
+ settings_.offset_fwd = 0;
+ settings_.offset_up = 0;
+ settings_.offset_right = 0;
+ }
+ else
+ {
+ calib_timer_.stop();
+ qDebug() << "pt: stopping translation calibration";
+ {
+ auto [tmp, nsamples] = trans_calib_.get_estimate();
+ settings_.offset_fwd = int(tmp[0]);
+ settings_.offset_up = int(tmp[1]);
+ settings_.offset_right = int(tmp[2]);
+ }
+ }
+ ui_.tx_spin->setEnabled(!start);
+ ui_.ty_spin->setEnabled(!start);
+ ui_.tz_spin->setEnabled(!start);
+
+ if (start)
+ ui_.tcalib_button->setText(tr("Stop calibration"));
+ else
+ ui_.tcalib_button->setText(tr("Start calibration"));
+}
+
+
+void NeuralNetDialog::onSelectPoseNetFile()
+{
+ const auto root = get_default_model_directory();
+ // Start with the current setting
+ QString filename = settings_.posenet_file;
+ // If the filename is relative then assume that the file is located under the
+ // model directory. Under regular use this should always be the case.
+ if (QFileInfo(filename).isRelative())
+ filename = root.absoluteFilePath(filename);
+ filename = QFileDialog::getOpenFileName(this,
+ tr("Select Pose Net ONNX"), filename, tr("ONNX Files (*.onnx)"));
+ // In case the user aborted.
+ if (filename.isEmpty())
+ return;
+ // When a file under the model directory was selected we can get rid of the
+ // directory prefix. This is more robust than storing absolute paths, e.g.
+ // in case the user moves the opentrack install folder / reuses old settings.
+ // When the file is not in the model directory, we have to use the absolute path,
+ // which is also fine as developer feature.
+ if (filename.startsWith(root.absolutePath()))
+ filename = root.relativeFilePath(filename);
+ settings_.posenet_file = filename;
+}
+
+
+Settings::Settings() : opts("neuralnet-tracker") {}
+
+} // neuralnet_tracker_ns
+
+OPENTRACK_DECLARE_TRACKER(NeuralNetTracker, NeuralNetDialog, NeuralNetMetadata)