1 files changed, 584 insertions, 522 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
index 2fad17aa..1fd50a94 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
@@ -6,16 +6,19 @@
  */
 
 #include "ftnoir_tracker_neuralnet.h"
+#include "deadzone_filter.h"
+#include "opencv_contrib.h"
+
 #include "compat/sleep.hpp"
 #include "compat/math-imports.hpp"
-#include "cv/init.hpp"
-#include <opencv2/core.hpp>
-#include <opencv2/core/hal/interface.h>
-#include <opencv2/core/types.hpp>
-#include <opencv2/calib3d.hpp>
-#include <opencv2/imgcodecs.hpp>
 #include "compat/timer.hpp"
+#include "compat/check-visible.hpp"
+#include "cv/init.hpp"
+
 #include <omp.h>
+#include <onnxruntime_cxx_api.h>
+#include <opencv2/core.hpp>
+#include <opencv2/core/quaternion.hpp>
 
 #ifdef _MSC_VER
 #   pragma warning(disable : 4702)
@@ -24,26 +27,32 @@
 #include <QMutexLocker>
 #include <QDebug>
 #include <QFile>
+#include <QFileDialog>
+#include <QFileInfo>
 
 #include <cstdio>
 #include <cmath>
 #include <algorithm>
 #include <chrono>
+#include <string>
+#include <stdexcept>
+#include <unordered_map>
 
 // Some demo code for onnx
 // https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp
 // https://github.com/leimao/ONNX-Runtime-Inference/blob/main/src/inference.cpp
 
-namespace
+namespace neuralnet_tracker_ns
 {
 
-using numeric_types::vec3;
-using numeric_types::vec2;
-using numeric_types::mat33;
-
-// Minimal difference if at all going from 1 to 2 threads.
-static constexpr int num_threads = 1;
+using namespace cvcontrib;
 
+using f = float;
+template<int n> using vec = cv::Vec<f, n>;
+template<int y, int x> using mat = cv::Matx<f, y, x>;
+using vec2 = vec<2>;
+using vec3 = vec<3>;
+using mat33 = mat<3, 3>;
 
 #if _MSC_VER
 std::wstring convert(const QString &s) { return s.toStdWString(); }
@@ -52,467 +61,405 @@ std::string convert(const QString &s) { return s.toStdString(); }
 #endif
 
 
-float sigmoid(float x)
+QDir get_default_model_directory()
 {
-    return 1.f/(1.f + std::exp(-x));
+    return QDir(OPENTRACK_BASE_PATH+ "/" OPENTRACK_LIBRARY_PATH "models");
 }
 
 
-template<class T>
-cv::Rect_<T> squarize(const cv::Rect_<T> &r)
+int enum_to_fps(int value)
 {
-    cv::Point_<T> c{r.x + r.width/T(2), r.y + r.height/T(2)};
-    const T sz = std::max(r.height, r.width);
-    return {c.x - sz/T(2), c.y - sz/T(2), sz, sz};
-}
+    int fps = 0;
 
+    switch (value)
+    {
+    default: eval_once(qDebug() << "neuralnet tracker: invalid fps enum value");
+    [[fallthrough]];
+    case fps_default:   fps = 0; break;
+    case fps_30:        fps = 30; break;
+    case fps_60:        fps = 60; break;
+    case fps_75:        fps = 75; break;
+    case fps_125:       fps = 125; break;
+    case fps_200:       fps = 200; break;
+    case fps_50:        fps = 50; break;
+    case fps_100:       fps = 100; break;
+    case fps_120:       fps = 120; break;
+    case fps_300:       fps = 300; break;
+    case fps_250:       fps = 250; break;
+    }
 
-int compute_padding(const cv::Rect &r, int w, int h)
-{
-    using std::max;
-    return max({
-        max(-r.x, 0),
-        max(-r.y, 0),
-        max(r.x+r.width-w, 0),
-        max(r.y+r.height-h, 0)
-    });
+    return fps;
 }
 
 
-cv::Rect2f unnormalize(const cv::Rect2f &r, int h, int w)
+template<class F>
+struct OnScopeExit
 {
-    auto unnorm = [](float x) -> float { return 0.5*(x+1); };
-    auto tl = r.tl();
-    auto br = r.br();
-    auto x0 = unnorm(tl.x)*w;
-    auto y0 = unnorm(tl.y)*h;
-    auto x1 = unnorm(br.x)*w;
-    auto y1 = unnorm(br.y)*h;
-    return {
-        x0, y0, x1-x0, y1-y0
-    };
-}
+    explicit OnScopeExit(F&& f) : f_{ f } {}
+    ~OnScopeExit() noexcept
+    {
+        f_();
+    }
+    F f_;
+};
 
-cv::Point2f normalize(const cv::Point2f &p, int h, int w)
+
+CamIntrinsics make_intrinsics(const cv::Mat& img, const Settings& settings)
 {
+    const int w = img.cols, h = img.rows;
+    const double diag_fov = settings.fov * M_PI / 180.;
+    const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));
+    const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));
+    const double focal_length_w = 1. / tan(.5 * fov_w);
+    const double focal_length_h = 1. / tan(.5 * fov_h);
+    /*  a
+      ______  <--- here is sensor area
+      |    /
+      |   /
+    f |  /
+      | /  2 x angle is the fov
+      |/
+        <--- here is the hole of the pinhole camera
+
+      So, a / f = tan(fov / 2)
+      => f = a/tan(fov/2)
+      What is a?
+      1 if we define f in terms of clip space where the image plane goes from -1 to 1. Because a is the half-width.
+    */
+
     return {
-        p.x/w*2.f-1.f,
-        p.y/h*2.f-1.f
+        (float)focal_length_w,
+        (float)focal_length_h,
+        (float)fov_w,
+        (float)fov_h
     };
 }
 
 
-mat33 rotation_from_two_vectors(const vec3 &a, const vec3 &b)
+cv::Rect make_crop_rect_multiple_of(const cv::Size &size, int multiple)
 {
-    vec3 axis = a.cross(b);
-    const float len_a = cv::norm(a);
-    const float len_b = cv::norm(b);
-    const float len_axis = cv::norm(axis);
-    const float sin_angle = std::clamp(len_axis / (len_a * len_b), -1.f, 1.f);
-    const float angle = std::asin(sin_angle);
-    axis *= angle/(1.e-12 + len_axis);
-    mat33 out;
-    cv::Rodrigues(axis, out);
-    return out;
+    const int new_w = (size.width / multiple) * multiple;
+    const int new_h = (size.height / multiple) * multiple;
+    return cv::Rect(
+        (size.width-new_w)/2,
+        (size.height-new_h)/2,
+        new_w,
+        new_h
+    );
 }
 
-
-/* Computes correction due to head being off screen center.
-    x, y: In screen space, i.e. in [-1,1]
-    focal_length_x: In screen space
-*/
-mat33 compute_rotation_correction(const cv::Point2f &p, float focal_length_x)
+template<class T>
+cv::Rect_<T> squarize(const cv::Rect_<T> &r)
 {
-    return rotation_from_two_vectors(
-        {1.f,0.f,0.f}, 
-        {focal_length_x, p.y, p.x});
+    cv::Point_<T> c{r.x + r.width/T(2), r.y + r.height/T(2)};
+    const T sz = std::max(r.height, r.width);
+    return {c.x - sz/T(2), c.y - sz/T(2), sz, sz};
 }
 
 
-mat33 quaternion_to_mat33(const std::array<float,4> quat)
+template<class T>
+cv::Rect_<T> expand(const cv::Rect_<T>& r, T factor)
 {
-    mat33 m;
-    const float w = quat[0];
-    const float i = quat[1];
-    const float j = quat[2];
-    const float k = quat[3];
-    m(0,0) = 1.f - 2.f*(j*j + k*k);
-    m(1,0) =       2.f*(i*j + k*w);
-    m(2,0) =       2.f*(i*k - j*w);
-    m(0,1) =       2.f*(i*j - k*w);
-    m(1,1) = 1.f - 2.f*(i*i + k*k);
-    m(2,1) =       2.f*(j*k + i*w);
-    m(0,2) =       2.f*(i*k + j*w);
-    m(1,2) =       2.f*(j*k - i*w);
-    m(2,2) = 1.f - 2.f*(i*i + j*j);
-    return m;
+    // xnew = l+.5*w - w*f*0.5 = l + .5*(w - new_w)
+    const cv::Size_<T> new_size = { r.width * factor, r.height * factor };
+    const cv::Point_<T> new_tl = r.tl() + (as_point(r.size()) - as_point(new_size)) / T(2);
+    return cv::Rect_<T>(new_tl, new_size);
 }
 
 
 template<class T>
-T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b)
+cv::Rect_<T> ewa_filter(const cv::Rect_<T>& last, const cv::Rect_<T>& current, T alpha)
 {
-    auto i = a & b;
-    return double{i.area()} / (a.area()+b.area()-i.area());
+    const auto last_center = T(0.5) * (last.tl() + last.br());
+    const auto cur_center = T(0.5) * (current.tl() + current.br());
+    const cv::Point_<T> new_size = as_point(last.size()) + alpha * (as_point(current.size()) - as_point(last.size()));
+    const cv::Point_<T> new_center = last_center + alpha * (cur_center - last_center);
+    return cv::Rect_<T>(new_center - T(0.5) * new_size, as_size(new_size));
 }
 
 
-} // namespace
-
-
-namespace neuralnet_tracker_ns
+cv::Vec3f image_to_world(float x, float y, float size, float reference_size_in_mm, const cv::Size2i& image_size, const CamIntrinsics& intrinsics)
 {
+    /*
+    Compute the location the network outputs in 3d space.
 
-
-int enum_to_fps(int value)
-{
-    switch (value)
-    {
-        case fps_30:        return 30;
-        case fps_60:        return 60;
-        default: [[fallthrough]];
-        case fps_default:   return 0;
-    }
+       hhhhhh  <- head size (meters)
+      \      | -----------------------
+       \     |                         \
+        \    |                          |
+         \   |                          |- x (meters)
+          ____ <- face.size / width     |
+           \ |  |                       |
+            \|  |- focal length        /
+               ------------------------
+            ------------------------------------------------>> z direction
+        z/x = zi / f
+        zi = image position
+        z = world position
+        f = focal length
+
+        We can also do deltas:
+        dz / x = dzi / f
+        => x = dz / dzi * f
+        which means we can compute x from the head size (dzi) if we assume some reference size (dz).
+    */
+    const float head_size_vertical = 2.f*size;  // Size from the model is more like half the real vertical size of a human head.
+    const float xpos = -(intrinsics.focal_length_w * image_size.width * 0.5f) / head_size_vertical * reference_size_in_mm;
+    const float zpos = (x / image_size.width * 2.f - 1.f) * xpos / intrinsics.focal_length_w;
+    const float ypos = (y / image_size.height * 2.f - 1.f) * xpos / intrinsics.focal_length_h;
+    return {xpos, ypos, zpos};
 }
 
 
-Localizer::Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session) :
-    session{std::move(session)},
-    scaled_frame(input_img_height, input_img_width, CV_8U),
-    input_mat(input_img_height, input_img_width, CV_32F)
+vec2 world_to_image(const cv::Vec3f& pos, const cv::Size2i& image_size, const CamIntrinsics& intrinsics)
 {
-    // Only works when input_mat does not reallocated memory ...which it should not.
-    // Non-owning memory reference to input_mat?
-    // Note: shape = (bach x channels x h x w)
-    const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width };
-    input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4);
-
-    const std::int64_t output_shape[2] = { 1, 5 };
-    output_val = Ort::Value::CreateTensor<float>(allocator_info, results.data(), results.size(), output_shape, 2);
+    const float xscr = pos[2] / pos[0] * intrinsics.focal_length_w;
+    const float yscr = pos[1] / pos[0] * intrinsics.focal_length_h;
+    const float x = (xscr+1.)*0.5f*image_size.width;
+    const float y = (yscr+1.)*0.5f*image_size.height;
+    return {x, y};
 }
 
 
-std::pair<float, cv::Rect2f> Localizer::run(
-    const cv::Mat &frame)
+cv::Quatf image_to_world(cv::Quatf q)
 {
-    auto p = input_mat.ptr(0);
-
-    cv::resize(frame, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA);
-    scaled_frame.convertTo(input_mat, CV_32F, 1./255., -0.5);
-
-    assert (input_mat.ptr(0) == p);
-    assert (!input_mat.empty() && input_mat.isContinuous());
-    assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height);
-
-    const char* input_names[] = {"x"};
-    const char* output_names[] = {"logit_box"};
-
-    //Timer t_; t_.start();
-
-    const auto nt = omp_get_num_threads();
-    omp_set_num_threads(num_threads);
-    session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1);
-    omp_set_num_threads(nt);
-
-    //qDebug() << "localizer: " << t_.elapsed_ms() << " ms\n";
-
-    const cv::Rect2f roi = unnormalize(cv::Rect2f{
-        results[1],
-        results[2],
-        results[3]-results[1], // Width
-        results[4]-results[2] // Height
-    }, frame.rows, frame.cols);
-    const float score = sigmoid(results[0]);
-
-    return { score, roi };
+    std::swap(q[1], q[3]);
+    q[1] = -q[1];
+    q[2] = -q[2];
+    q[3] = -q[3];
+    return q;
 }
 
 
-PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session) :
-    session{std::move(session)},
-    scaled_frame(input_img_height, input_img_width, CV_8U),
-    input_mat(input_img_height, input_img_width, CV_32F)
+cv::Point2f normalize(const cv::Point2f &p, int h, int w)
 {
-    {
-        const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width };
-        input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4);
-    }
-
-    {
-        const std::int64_t output_shape[2] = { 1, 3 };
-        output_val[0] = Ort::Value::CreateTensor<float>(
-            allocator_info, &output_coord[0], output_coord.rows, output_shape, 2);
-    }
-
-    {
-        const std::int64_t output_shape[2] = { 1, 4 };
-        output_val[1] = Ort::Value::CreateTensor<float>(
-            allocator_info, &output_quat[0], output_quat.rows, output_shape, 2);
-    }
-
-    {
-        const std::int64_t output_shape[2] = { 1, 4 };
-        output_val[2] = Ort::Value::CreateTensor<float>(
-            allocator_info, &output_box[0], output_box.rows, output_shape, 2);
-    }
+    return {
+        p.x/w*2.f-1.f,
+        p.y/h*2.f-1.f
+    };
 }
 
 
-int PoseEstimator::find_input_intensity_90_pct_quantile() const
+cv::Quatf rotation_from_two_vectors(const vec3 &a, const vec3 &b)
 {
-    const int channels[] = { 0 };
-    const int hist_size[] = { 255 };
-    float range[] = { 0, 256 };
-    const float* ranges[] = { range };
-    cv::Mat hist;
-    cv::calcHist(&scaled_frame, 1,  channels, cv::Mat(), hist, 1, hist_size, ranges, true, false);
-    int gray_level = 0;
-    const int num_pixels_quantile = scaled_frame.total()*0.9f;
-    int num_pixels_accum = 0;
-    for (int i=0; i<hist_size[0]; ++i)
+    // |axis| = |a| * |b| * sin(alpha)
+    const vec3 axis = a.cross(b);
+    // dot = |a|*|b|*cos(alpha)
+    const float dot = a.dot(b);
+    const float len = cv::norm(axis);
+    vec3 normed_axis = axis / len;
+    float angle = std::atan2(len, dot);
+    if (!(std::isfinite(normed_axis[0]) && std::isfinite(normed_axis[1]) && std::isfinite(normed_axis[2])))
     {
-        num_pixels_accum += hist.at<float>(i);
-        if (num_pixels_accum > num_pixels_quantile)
-        {
-            gray_level = i;
-            break;
-        }
+        angle = 0.f;
+        normed_axis = vec3{1.,0.,0.};
     }
-    return gray_level;
+    return cv::Quatf::createFromAngleAxis(angle, normed_axis);
 }
 
 
-std::optional<PoseEstimator::Face> PoseEstimator::run(
-    const cv::Mat &frame, const cv::Rect &box)
+// Computes correction due to head being off screen center.
+cv::Quatf compute_rotation_correction(const cv::Point3f& p)
 {
-    cv::Mat cropped;
-    
-    const int patch_size = std::max(box.width, box.height)*1.05;
-    const cv::Point2f patch_center = {
-        std::clamp<float>(box.x + 0.5f*box.width, 0.f, frame.cols),
-        std::clamp<float>(box.y + 0.5f*box.height, 0.f, frame.rows)
-    };
-    cv::getRectSubPix(frame, {patch_size, patch_size}, patch_center, cropped);
-
-    // Will get failure if patch_center is outside image boundaries.
-    // Have to catch this case.
-    if (cropped.rows != patch_size || cropped.cols != patch_size)
-        return {};
-    
-    auto p = input_mat.ptr(0);
-
-    cv::resize(cropped, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA);
-
-    // Automatic brightness amplification.
-    const int brightness = find_input_intensity_90_pct_quantile();
-    const double alpha = brightness<127 ? 0.5/std::max(5,brightness) : 1./255;
-    const double beta = -0.5;
-
-    scaled_frame.convertTo(input_mat, CV_32F, alpha, beta);
-
-    assert (input_mat.ptr(0) == p);
-    assert (!input_mat.empty() && input_mat.isContinuous());
-    assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height);
-
-    const char* input_names[] = {"x"};
-    const char* output_names[] = {"pos_size", "quat", "box"};
-
-    //Timer t_; t_.start();
+    return rotation_from_two_vectors(
+        {-1.f,0.f,0.f}, p);
+}
 
-    const auto nt = omp_get_num_threads();
-    omp_set_num_threads(num_threads);
-    session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, output_val, 3);
-    omp_set_num_threads(nt);
 
-    // FIXME: Execution time fluctuates wildly. 19 to 26 ms. Why???
-    //        The instructions are always the same. Maybe a memory allocation
-    //        issue. The ONNX api suggests that tensor are allocated in an
-    //        arena. Does that matter? Maybe the issue is something else?
+// Intersection over union. A value between 0 and 1 which measures the match between the bounding boxes.
+template<class T>
+T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b)
+{
+    auto i = a & b;
+    return double{i.area()} / (a.area()+b.area()-i.area());
+}
 
-    //qDebug() << "pose net: " << t_.elapsed_ms() << " ms\n";
 
-    // Perform coordinate transformation.
-    // From patch-local normalized in [-1,1] to
-    // frame unnormalized pixel coordinates.
+class GuardedThreadCountSwitch
+{
+    int old_num_threads_cv_ = 1;
+    int old_num_threads_omp_ = 1;
+    public:
+        GuardedThreadCountSwitch(int num_threads)
+        {
+            old_num_threads_cv_ = cv::getNumThreads();
+            old_num_threads_omp_ = omp_get_num_threads();
+            omp_set_num_threads(num_threads);
+            cv::setNumThreads(num_threads);
+        }
 
-    const cv::Point2f center = patch_center + 
-        (0.5f*patch_size)*cv::Point2f{output_coord[0], output_coord[1]};
+        ~GuardedThreadCountSwitch()
+        {
+            omp_set_num_threads(old_num_threads_omp_);
+            cv::setNumThreads(old_num_threads_cv_);
+        }
 
-    const float size = patch_size*0.5f*output_coord[2];
+        GuardedThreadCountSwitch(const GuardedThreadCountSwitch&) = delete;
+        GuardedThreadCountSwitch& operator=(const GuardedThreadCountSwitch&) = delete;
+};
 
-    // Following Eigen which uses quat components in the order w, x, y, z.
-    const std::array<float,4> rotation = { 
-        output_quat[3], 
-        output_quat[0], 
-        output_quat[1], 
-        output_quat[2] };
 
-    const cv::Rect2f outbox = {
-        patch_center.x + (0.5f*patch_size)*output_box[0],
-        patch_center.y + (0.5f*patch_size)*output_box[1],
-        0.5f*patch_size*(output_box[2]-output_box[0]),
-        0.5f*patch_size*(output_box[3]-output_box[1])
-    };
+bool NeuralNetTracker::detect()
+{
+    double inference_time = 0.;
 
-    return std::optional<Face>({
-        rotation, outbox, center, size
-    });
-}
+    OnScopeExit update_inference_time{ [&]() {
 
+            QMutexLocker lck{ &stats_mtx_ };
+            inference_time_ = inference_time;
+    } };
 
-cv::Mat PoseEstimator::last_network_input() const
-{
-    cv::Mat ret;
-    if (!input_mat.empty())
+    // If there is no past ROI from the localizer or if the match of its output
+    // with the current ROI is too poor we have to run it again. This causes a
+    // latency spike of maybe an additional 50%. But it only occurs when the user
+    // moves his head far enough - or when the tracking ist lost ...
+    if (!last_localizer_roi_ || !last_roi_ ||
+        iou(*last_localizer_roi_,*last_roi_)<0.25)
     {
-        input_mat.convertTo(ret, CV_8U, 255., 127.);
-        cv::cvtColor(ret, ret, cv::COLOR_GRAY2RGB);
-    }
-    return ret;
-}
+        auto [p, rect] = localizer_->run(grayscale_);
+        inference_time += localizer_->last_inference_time_millis();
 
-
-bool neuralnet_tracker::detect()
-{
-    // Note: BGR colors!
-    if (!last_localizer_roi || !last_roi ||
-        iou(*last_localizer_roi,*last_roi)<0.25)
-    {
-        auto [p, rect] = localizer->run(grayscale);
-        if (p > 0.5)
+        if (last_roi_ && iou(rect,*last_roi_)>=0.25 && p > 0.5)
+        {
+            // The new ROI matches the result from tracking, so the user is
+            // still there and to not disturb recurrent models, we only update
+            // ...
+            last_localizer_roi_ = rect;
+        }
+        else if (p > 0.5 && rect.height > 32 && rect.width > 32)
         {
-            last_localizer_roi = rect;
-            last_roi = rect;
+            // Tracking probably got lost since the ROI's don't match, but the
+            // localizer still finds a face, so we use the ROI from the localizer
+            last_localizer_roi_ = rect;
+            last_roi_ = rect;
+        }
+        else
+        {
+            // Tracking lost and no localization result. The user probably can't be seen.
+            last_roi_.reset();
+            last_localizer_roi_.reset();
         }
     }
 
-    if (!last_roi)
+    if (!last_roi_)
+    {
+        // Last iteration the tracker failed to generate a trustworthy
+        // roi and the localizer also cannot find a face.
+        draw_gizmos({}, {});
         return false;
+    }
+
+    auto face = poseestimator_->run(grayscale_, *last_roi_);
+    inference_time += poseestimator_->last_inference_time_millis();
 
-    auto face = poseestimator->run(grayscale, *last_roi);
-    
     if (!face)
     {
-        last_roi.reset();
+        last_roi_.reset();
+        draw_gizmos({}, {});
         return false;
     }
 
-    last_roi = face->box;
+    cv::Rect2f roi = expand(face->box, (float)settings_.roi_zoom);
 
-    Affine pose = compute_pose(*face);
+    last_roi_ = ewa_filter(*last_roi_, roi, float(settings_.roi_filter_alpha));
 
-    draw_gizmos(frame, *face, pose);
+    QuatPose pose = compute_filtered_pose(*face);
+    last_pose_ = pose;
+
+    Affine pose_affine = {
+        pose.rot.toRotMat3x3(cv::QUAT_ASSUME_UNIT),
+        pose.pos };
 
     {
-        QMutexLocker lck(&mtx);
-        this->pose_ = pose;
+        QMutexLocker lck(&mtx_);
+        last_pose_affine_ = pose_affine;
     }
 
+    draw_gizmos(*face, last_pose_affine_);
+
     return true;
 }
 
 
-Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const
+void NeuralNetTracker::draw_gizmos(
+    const std::optional<PoseEstimator::Face> &face,
+    const Affine& pose)
 {
-    const mat33 rot_correction = compute_rotation_correction(
-        normalize(face.center, frame.rows, frame.cols),
-        intrinsics.focal_length_w);
+    if (!is_visible_)
+        return;
 
-    const mat33 m = rot_correction * quaternion_to_mat33(face.rotation);
+    preview_.draw_gizmos(
+        face,
+        last_roi_,
+        last_localizer_roi_,
+        world_to_image(pose.t, grayscale_.size(), intrinsics_));
 
-    /*
-         
-       hhhhhh  <- head size (meters)
-      \      | -----------------------
-       \     |                         \
-        \    |                          |
-         \   |                          |- tz (meters)
-          ____ <- face.size / width     |
-           \ |  |                       |
-            \|  |- focal length        /
-               ------------------------
-    */
+    if (settings_.show_network_input)
+    {
+        cv::Mat netinput = poseestimator_->last_network_input();
+        preview_.overlay_netinput(netinput);
+    }
+}
+
+
+QuatPose NeuralNetTracker::transform_to_world_pose(const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) const
+{
+    const vec3 face_world_pos = image_to_world(
+        face_xy.x, face_xy.y, face_size, HEAD_SIZE_MM,
+        grayscale_.size(),
+        intrinsics_);
+
+    const cv::Quatf rot_correction = compute_rotation_correction(
+        face_world_pos);
 
-    // Compute the location the network outputs in 3d space.
-    const vec3 face_world_pos = image_to_world(face.center.x, face.center.y, face.size, head_size_mm);
+    cv::Quatf rot = rot_correction * image_to_world(face_rotation);
 
     // But this is in general not the location of the rotation joint in the neck.
-    // So we need an extra offset. Which we determine by solving
+    // So we need an extra offset. Which we determine by computing
     // z,y,z-pos = head_joint_loc + R_face * offset
+    const vec3 local_offset = vec3{
+        static_cast<float>(settings_.offset_fwd),
+        static_cast<float>(settings_.offset_up),
+        static_cast<float>(settings_.offset_right)};
+    const vec3 offset = rotate(rot, local_offset);
+    const vec3 pos = face_world_pos + offset;
 
-    const vec3 pos = face_world_pos
-        + m * vec3{
-            static_cast<float>(s.offset_fwd), 
-            static_cast<float>(s.offset_up),
-            static_cast<float>(s.offset_right)};
-
-    return { m, pos };
+    return { rot, pos };
 }
 
 
-void neuralnet_tracker::draw_gizmos(
-    cv::Mat frame,
-    const PoseEstimator::Face &face,
-    const Affine& pose) const
+QuatPose NeuralNetTracker::compute_filtered_pose(const PoseEstimator::Face &face)
 {
-    if (last_roi) 
-    {
-        const int col = 255;
-        cv::rectangle(frame, *last_roi, cv::Scalar(0, 255, 0), /*thickness=*/1);
-    }
-    if (last_localizer_roi)
-    {
-        const int col = 255;
-        cv::rectangle(frame, *last_localizer_roi, cv::Scalar(col, 0, 255-col), /*thickness=*/1);
-    }
-
-    if (face.size>=1.f)
-        cv::circle(frame, static_cast<cv::Point>(face.center), int(face.size), cv::Scalar(255,255,255), 2);
-    cv::circle(frame, static_cast<cv::Point>(face.center), 3, cv::Scalar(255,255,255), -1);
-
-    auto draw_coord_line = [&](int i, const cv::Scalar& color)
-    {
-        const float vx = -pose.R(2,i);
-        const float vy = -pose.R(1,i);
-        static constexpr float len = 100.f;
-        cv::Point q = face.center + len*cv::Point2f{vx, vy};
-        cv::line(frame, static_cast<cv::Point>(face.center), static_cast<cv::Point>(q), color, 2);
-    };
-    draw_coord_line(0, {0, 0, 255});
-    draw_coord_line(1, {0, 255, 0});
-    draw_coord_line(2, {255, 0, 0});
-
-    if (s.show_network_input)
+    if (fps_ > 0.001 && last_pose_ && poseestimator_->has_uncertainty())
     {
-        cv::Mat netinput = poseestimator->last_network_input();
-        if (!netinput.empty())
-        {
-            const int w = std::min(netinput.cols, frame.cols);
-            const int h = std::min(netinput.rows, frame.rows);
-            cv::Rect roi(0, 0, w, h);
-            netinput(roi).copyTo(frame(roi));
-        }
+        auto image2world = [this](const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) {
+                return this->transform_to_world_pose(face_rotation, face_xy, face_size); };
+
+        return apply_filter(
+            face,
+            *last_pose_,
+            1./fps_,
+            std::move(image2world),
+            FiltParams{
+                float(settings_.deadzone_hardness),
+                float(settings_.deadzone_size)
+            });
     }
+    else
     {
-        // Draw the computed joint position
-        auto xy = world_to_image(pose.t);
-        cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1);
+        return transform_to_world_pose(face.rotation, face.center, face.size);
     }
-
-    char buf[128];
-    ::snprintf(buf, sizeof(buf), "%d Hz, Max: %d ms", clamp(int(fps), 0, 9999), int(max_frame_time*1000.));
-    cv::putText(frame, buf, cv::Point(10, frame.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1);
 }
 
 
-neuralnet_tracker::neuralnet_tracker()
+NeuralNetTracker::NeuralNetTracker()
 {
     opencv_init();
-    cv::setNumThreads(num_threads);
+    neuralnet_tracker_tests::run();
 }
 
 
-neuralnet_tracker::~neuralnet_tracker()
+NeuralNetTracker::~NeuralNetTracker()
 {
     requestInterruption();
     wait();
@@ -521,130 +468,107 @@ neuralnet_tracker::~neuralnet_tracker()
 }
 
 
-module_status neuralnet_tracker::start_tracker(QFrame* videoframe)
+module_status NeuralNetTracker::start_tracker(QFrame* videoframe)
 {
     videoframe->show();
-    videoWidget = std::make_unique<cv_video_widget>(videoframe);
-    layout = std::make_unique<QHBoxLayout>();
-    layout->setContentsMargins(0, 0, 0, 0);
-    layout->addWidget(videoWidget.get());
-    videoframe->setLayout(layout.get());
-    videoWidget->show();
+    video_widget_ = std::make_unique<cv_video_widget>(videoframe);
+    layout_ = std::make_unique<QHBoxLayout>();
+    layout_->setContentsMargins(0, 0, 0, 0);
+    layout_->addWidget(&*video_widget_);
+    videoframe->setLayout(&*layout_);
+    video_widget_->show();
+    num_threads_ = settings_.num_threads;
     start();
     return status_ok();
 }
 
 
-bool neuralnet_tracker::load_and_initialize_model()
+bool NeuralNetTracker::load_and_initialize_model()
 {
     const QString localizer_model_path_enc =
         OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-localizer.onnx";
-    const QString poseestimator_model_path_enc =
-        OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-pose.onnx";
+    const QString poseestimator_model_path_enc = get_posenet_filename();
 
     try
     {
-        env = Ort::Env{
+        env_ = Ort::Env{
             OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR,
             "tracker-neuralnet"
         };
         auto opts = Ort::SessionOptions{};
         // Do thread settings here do anything?
         // There is a warning which says to control number of threads via
-        // openmp settings. Which is what we do. omp_set_num_threads directly
-        // before running the inference pass.
-        opts.SetIntraOpNumThreads(num_threads);
-        opts.SetInterOpNumThreads(num_threads);
-        opts.SetGraphOptimizationLevel(
-            GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
-
-        opts.EnableCpuMemArena();
-        allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
-
-        localizer.emplace(
-            allocator_info, 
-            Ort::Session{env, convert(localizer_model_path_enc).c_str(), opts});
-        
-        poseestimator.emplace(
-            allocator_info,
-            Ort::Session{env, convert(poseestimator_model_path_enc).c_str(), opts});
+        // openmp settings. Which is what we do.
+        opts.SetIntraOpNumThreads(num_threads_);
+        opts.SetInterOpNumThreads(1);
+        allocator_info_ = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+
+        localizer_.emplace(
+            allocator_info_,
+            Ort::Session{env_, convert(localizer_model_path_enc).c_str(), opts});
+
+        qDebug() << "Loading pose net " << poseestimator_model_path_enc;
+        poseestimator_.emplace(
+            allocator_info_,
+            Ort::Session{env_, convert(poseestimator_model_path_enc).c_str(), opts});
     }
     catch (const Ort::Exception &e)
     {
-        qDebug() << "Failed to initialize the neural network models. ONNX error message: " 
+        qDebug() << "Failed to initialize the neural network models. ONNX error message: "
             << e.what();
         return false;
     }
+    catch (const std::exception &e)
+    {
+        qDebug() << "Failed to initialize the neural network models. Error message: " << e.what();
+        return false;
+    }
+
     return true;
 }
 
 
-bool neuralnet_tracker::open_camera()
+bool NeuralNetTracker::open_camera()
 {
-    int fps = enum_to_fps(s.force_fps);
+    int rint = std::clamp(*settings_.resolution, 0, (int)std::size(resolution_choices)-1);
+    resolution_tuple res = resolution_choices[rint];
+    int fps = enum_to_fps(settings_.force_fps);
 
-    QMutexLocker l(&camera_mtx);
+    QMutexLocker l(&camera_mtx_);
 
-    camera = video::make_camera(s.camera_name);
+    camera_ = video::make_camera(settings_.camera_name);
 
-    if (!camera)
+    if (!camera_)
         return false;
 
     video::impl::camera::info args {};
 
-    args.width = 320;
-    args.height = 240;
-
+    if (res.width)
+    {
+        args.width = res.width;
+        args.height = res.height;
+    }
     if (fps)
         args.fps = fps;
 
-    if (!camera->start(args))
+    args.use_mjpeg = settings_.use_mjpeg;
+
+    if (!camera_->start(args))
     {
         qDebug() << "neuralnet tracker: can't open camera";
         return false;
     }
-    return true;
-}
-
-
-void neuralnet_tracker::set_intrinsics()
-{
-    const int w = grayscale.cols, h = grayscale.rows;
-    const double diag_fov = s.fov * M_PI / 180.;
-    const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));
-    const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));
-    const double focal_length_w = 1. / tan(.5 * fov_w);
-    const double focal_length_h = 1. / tan(.5 * fov_h);
 
-    intrinsics.fov_h = fov_h;
-    intrinsics.fov_w = fov_w;
-    intrinsics.focal_length_w = focal_length_w;
-    intrinsics.focal_length_h = focal_length_h;
+    return true;
 }
 
 
-vec3 neuralnet_tracker::image_to_world(float x, float y, float size, float real_size) const
+void NeuralNetTracker::run()
 {
-    // Compute the location the network outputs in 3d space.
-    const float xpos = -(intrinsics.focal_length_w * frame.cols * 0.5f) / size * real_size;
-    const float zpos = (x / frame.cols * 2.f - 1.f) * xpos / intrinsics.focal_length_w;
-    const float ypos = (y / frame.rows * 2.f - 1.f) * xpos / intrinsics.focal_length_h;
-    return {xpos, ypos, zpos};
-}
+    preview_.init(*video_widget_);
 
+    GuardedThreadCountSwitch switch_num_threads_to(num_threads_);
 
-vec2 neuralnet_tracker::world_to_image(const vec3& pos) const
-{
-    const float xscr = pos[2] / pos[0] * intrinsics.focal_length_w;
-    const float yscr = pos[1] / pos[0] * intrinsics.focal_length_h;
-    const float x = (xscr+1.)*0.5f*frame.cols;
-    const float y = (yscr+1.)*0.5f*frame.rows;
-    return {x, y};
-}
-
-
-void neuralnet_tracker::run()
-{
     if (!open_camera())
         return;
 
@@ -655,11 +579,12 @@ void neuralnet_tracker::run()
 
     while (!isInterruptionRequested())
     {
+        is_visible_ = check_is_visible();
         auto t = clk.now();
         {
-            QMutexLocker l(&camera_mtx);
+            QMutexLocker l(&camera_mtx_);
 
-            auto [ img, res ] = camera->get_frame();
+            auto [ img, res ] = camera_->get_frame();
 
             if (!res)
             {
@@ -668,16 +593,24 @@ void neuralnet_tracker::run()
                 continue;
             }
 
-            auto color = cv::Mat(img.height, img.width, CV_8UC(img.channels), (void*)img.data, img.stride);
-            color.copyTo(frame);
+            {
+                QMutexLocker lck{&stats_mtx_};
+                resolution_ = { img.width, img.height };
+            }
+
+            auto color = prepare_input_image(img);
+
+            if (is_visible_)
+                preview_.copy_video_frame(color);
 
             switch (img.channels)
             {
             case 1:
-                grayscale.setTo(color); 
+                grayscale_.create(img.height, img.width, CV_8UC1);
+                color.copyTo(grayscale_);
                 break;
             case 3:
-                cv::cvtColor(color, grayscale, cv::COLOR_BGR2GRAY);
+                cv::cvtColor(color, grayscale_, cv::COLOR_BGR2GRAY);
                 break;
             default:
                 qDebug() << "Can't handle" << img.channels << "color channels";
@@ -685,13 +618,13 @@ void neuralnet_tracker::run()
             }
         }
 
-        set_intrinsics();
+        intrinsics_ = make_intrinsics(grayscale_, settings_);
 
         detect();
 
-        if (frame.rows > 0)
-            videoWidget->update_image(frame);
-        
+        if (is_visible_)
+            preview_.copy_to_widget(*video_widget_);
+
         update_fps(
             std::chrono::duration_cast<std::chrono::milliseconds>(
                 clk.now() - t).count()*1.e-3);
@@ -699,40 +632,71 @@ void neuralnet_tracker::run()
 }
 
 
-void neuralnet_tracker::update_fps(double dt)
+cv::Mat NeuralNetTracker::prepare_input_image(const video::frame& frame)
 {
-    const double alpha = dt/(dt + RC);
+    auto img = cv::Mat(frame.height, frame.width, CV_8UC(frame.channels), (void*)frame.data, frame.stride);
 
-    if (dt > 1e-6)
+    // Crop if aspect ratio is not 4:3
+    if (img.rows*4 != img.cols*3)
+    {
+        img = img(make_crop_rect_for_aspect(img.size(), 4, 3));
+    }
+
+    img = img(make_crop_rect_multiple_of(img.size(), 4));
+
+    if (img.cols > 640)
+    {
+        cv::pyrDown(img, downsized_original_images_[0]);
+        img = downsized_original_images_[0];
+    }
+    if (img.cols > 640)
     {
-        fps *= 1 - alpha;
-        fps += alpha * 1./dt;
+        cv::pyrDown(img, downsized_original_images_[1]);
+        img = downsized_original_images_[1];
     }
 
-    max_frame_time = std::max(max_frame_time, dt);
+    return img;
+}
+
+
+void NeuralNetTracker::update_fps(double dt)
+{
+    const double alpha = dt/(dt + RC);
+    if (dt > 1e-6)
+    {
+        QMutexLocker lck{&stats_mtx_};
+        fps_ *= 1 - alpha;
+        fps_ += alpha * 1./dt;
+    }
 }
 
 
-void neuralnet_tracker::data(double *data)
+void NeuralNetTracker::data(double *data)
 {
     Affine tmp = [&]()
     {
-        QMutexLocker lck(&mtx);
-        return pose_;
+        QMutexLocker lck(&mtx_);
+        return last_pose_affine_;
     }();
 
     const auto& mx = tmp.R.col(0);
     const auto& my = tmp.R.col(1);
-    const auto& mz = -tmp.R.col(2);
+    const auto& mz = tmp.R.col(2);
 
+    // For reference: https://en.wikipedia.org/wiki/Euler_angles. Section "Rotation matrix". The relevant matrix is 
+    // under "Tait-Bryan angles", row with "Y_alpha Z_beta X_gamma = ...". 
+    // Because for the NN tracker x is forward, and y is up. We can see that the x axis is independent of roll. Thus it
+    // is relatively easy to figure out the yaw and pitch angles (alpha and beta).
     const float yaw = std::atan2(mx(2), mx(0));
     const float pitch = -std::atan2(-mx(1), std::sqrt(mx(2)*mx(2)+mx(0)*mx(0)));
-    const float roll = std::atan2(-my(2), mz(2));
+    // For the roll angle we recognize that the matrix entries in the second row contain cos(pitch)*cos(roll), and
+    // cos(pitch)*sin(roll). Using atan2 eliminates the common pitch factor and we obtain the roll angle.
+    const float roll = std::atan2(-mz(1), my(1));
     {
         constexpr double rad2deg = 180/M_PI;
         data[Yaw]   = rad2deg * yaw;
         data[Pitch] = rad2deg * pitch;
-        data[Roll]  = rad2deg * roll;
+        data[Roll]  = -rad2deg * roll;
 
         // convert to cm
         data[TX] = -tmp.t[2] * 0.1;
@@ -742,112 +706,185 @@ void neuralnet_tracker::data(double *data)
 }
 
 
-Affine neuralnet_tracker::pose()
+Affine NeuralNetTracker::pose()
+{
+    QMutexLocker lck(&mtx_);
+    return last_pose_affine_;
+}
+
+
+std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const
 {
-    QMutexLocker lck(&mtx);
-    return pose_;
+    QMutexLocker lck(&stats_mtx_);
+    return { resolution_, fps_, inference_time_ };
 }
 
 
-void neuralnet_dialog::make_fps_combobox()
+QString NeuralNetTracker::get_posenet_filename() const
+{
+    QString filename = settings_.posenet_file;
+    if (QFileInfo(filename).isRelative())
+        filename = get_default_model_directory().absoluteFilePath(filename);
+    return filename;
+}
+
+
+void NeuralNetDialog::make_fps_combobox()
 {
     for (int k = 0; k < fps_MAX; k++)
     {
         const int hz = enum_to_fps(k);
         const QString name = (hz == 0) ? tr("Default") : QString::number(hz);
-        ui.cameraFPS->addItem(name, k);
+        ui_.cameraFPS->addItem(name, k);
+    }
+}
+
+void NeuralNetDialog::make_resolution_combobox()
+{
+    int k=0;
+    for (const auto [w, h] : resolution_choices)
+    {
+        const QString s = (w == 0)
+            ? tr("Default")
+            : QString::number(w) + " x " + QString::number(h);
+        ui_.resolution->addItem(s, k++);
     }
 }
 
 
-neuralnet_dialog::neuralnet_dialog() :
-    trans_calib(1, 2)
+NeuralNetDialog::NeuralNetDialog() :
+    trans_calib_(1, 2)
 {
-    ui.setupUi(this);
+    ui_.setupUi(this);
 
     make_fps_combobox();
-    tie_setting(s.force_fps, ui.cameraFPS);
+    make_resolution_combobox();
 
     for (const auto& str : video::camera_names())
-        ui.cameraName->addItem(str);
+        ui_.cameraName->addItem(str);
 
-    tie_setting(s.camera_name, ui.cameraName);
-    tie_setting(s.fov, ui.cameraFOV);
-    tie_setting(s.offset_fwd, ui.tx_spin);
-    tie_setting(s.offset_up, ui.ty_spin);
-    tie_setting(s.offset_right, ui.tz_spin);
-    tie_setting(s.show_network_input, ui.showNetworkInput);
+    tie_setting(settings_.camera_name, ui_.cameraName);
+    tie_setting(settings_.fov, ui_.cameraFOV);
+    tie_setting(settings_.offset_fwd, ui_.tx_spin);
+    tie_setting(settings_.offset_up, ui_.ty_spin);
+    tie_setting(settings_.offset_right, ui_.tz_spin);
+    tie_setting(settings_.show_network_input, ui_.showNetworkInput);
+    tie_setting(settings_.roi_filter_alpha, ui_.roiFilterAlpha);
+    tie_setting(settings_.use_mjpeg, ui_.use_mjpeg);
+	tie_setting(settings_.roi_zoom, ui_.roiZoom);
+    tie_setting(settings_.num_threads, ui_.threadCount);
+    tie_setting(settings_.resolution, ui_.resolution);
+    tie_setting(settings_.force_fps, ui_.cameraFPS);
+    tie_setting(settings_.posenet_file, ui_.posenetFileDisplay);
 
-    connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK()));
-    connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel()));
-    connect(ui.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings()));
+    connect(ui_.buttonBox, SIGNAL(accepted()), this, SLOT(doOK()));
+    connect(ui_.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel()));
+    connect(ui_.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings()));
+    connect(ui_.posenetSelectButton, SIGNAL(clicked()), this, SLOT(onSelectPoseNetFile()));
+    connect(&settings_.camera_name, value_::value_changed<QString>(), this, &NeuralNetDialog::update_camera_settings_state);
 
-    connect(&s.camera_name, value_::value_changed<QString>(), this, &neuralnet_dialog::update_camera_settings_state);
+    update_camera_settings_state(settings_.camera_name);
 
-    update_camera_settings_state(s.camera_name);
+    connect(&calib_timer_, &QTimer::timeout, this, &NeuralNetDialog::trans_calib_step);
+    calib_timer_.setInterval(35);
+    connect(ui_.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool)));
+
+    connect(&tracker_status_poll_timer_, &QTimer::timeout, this, &NeuralNetDialog::status_poll);
+    tracker_status_poll_timer_.setInterval(250);
+    tracker_status_poll_timer_.start();
+}
 
-    connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step);
-    calib_timer.setInterval(35);
-    connect(ui.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool)));
+void NeuralNetDialog::save()
+{
+    settings_.b->save();
 }
 
+void NeuralNetDialog::reload()
+{
+    settings_.b->reload();
+}
 
-void neuralnet_dialog::doOK()
+void NeuralNetDialog::doOK()
 {
-    s.b->save();
+    save();
     close();
 }
 
 
-void neuralnet_dialog::doCancel()
+void NeuralNetDialog::doCancel()
 {
     close();
 }
 
 
-void neuralnet_dialog::camera_settings()
+void NeuralNetDialog::camera_settings()
 {
-    if (tracker)
+    if (tracker_)
     {
-        QMutexLocker l(&tracker->camera_mtx);
-        (void)tracker->camera->show_dialog();
+        QMutexLocker l(&tracker_->camera_mtx_);
+        (void)tracker_->camera_->show_dialog();
     }
     else
-        (void)video::show_dialog(s.camera_name);
+        (void)video::show_dialog(settings_.camera_name);
 }
 
 
-void neuralnet_dialog::update_camera_settings_state(const QString& name)
+void NeuralNetDialog::update_camera_settings_state(const QString& name)
 {
     (void)name;
-    ui.camera_settings->setEnabled(true);
+    ui_.camera_settings->setEnabled(true);
 }
 
 
-void neuralnet_dialog::register_tracker(ITracker * x)
+void NeuralNetDialog::register_tracker(ITracker * x)
 {
-    tracker = static_cast<neuralnet_tracker*>(x);
-    ui.tcalib_button->setEnabled(true);
+    tracker_ = static_cast<NeuralNetTracker*>(x);
+    ui_.tcalib_button->setEnabled(true);
 }
 
 
-void neuralnet_dialog::unregister_tracker()
+void NeuralNetDialog::unregister_tracker()
 {
-    tracker = nullptr;
-    ui.tcalib_button->setEnabled(false);
+    tracker_ = nullptr;
+    ui_.tcalib_button->setEnabled(false);
 }
 
+bool NeuralNetDialog::embeddable() noexcept
+{
+    return true;
+}
 
-void neuralnet_dialog::trans_calib_step()
+void NeuralNetDialog::set_buttons_visible(bool x)
 {
-    if (tracker)
+    ui_.buttonBox->setVisible(x);
+}
+
+void NeuralNetDialog::status_poll()
+{
+    QString status;
+    if (!tracker_)
+    {
+        status = tr("Tracker Offline");
+    }
+    else
     {
-        const Affine X_CM = [&]() { 
-            QMutexLocker l(&calibrator_mutex);
-            return tracker->pose();
+        auto [ res, fps, inference_time ] = tracker_->stats();
+        status = tr("%1x%2 @ %3 FPS / Inference: %4 ms").arg(res.width).arg(res.height).arg(int(fps)).arg(inference_time, 0, 'f', 1);
+    }
+    ui_.resolution_display->setText(status);
+}
+
+
+void NeuralNetDialog::trans_calib_step()
+{
+    if (tracker_)
+    {
+        const Affine X_CM = [&]() {
+            QMutexLocker l(&calibrator_mutex_);
+            return tracker_->pose();
         }();
-        trans_calib.update(X_CM.R, X_CM.t);
-        auto [_, nsamples] = trans_calib.get_estimate();
+        trans_calib_.update(X_CM.R, X_CM.t);
+        auto [_, nsamples] = trans_calib_.get_estimate();
 
         constexpr int min_yaw_samples = 15;
         constexpr int min_pitch_samples = 12;
@@ -866,52 +903,77 @@ void neuralnet_dialog::trans_calib_step()
             const int nsamples_total = nsamples[0] + nsamples[1];
             sample_feedback = tr("%1 samples. Over %2, good!").arg(nsamples_total).arg(min_samples);
         }
-        ui.sample_count_display->setText(sample_feedback);
+        ui_.sample_count_display->setText(sample_feedback);
     }
     else
         startstop_trans_calib(false);
 }
 
 
-void neuralnet_dialog::startstop_trans_calib(bool start)
+void NeuralNetDialog::startstop_trans_calib(bool start)
 {
-    QMutexLocker l(&calibrator_mutex);
-    // FIXME: does not work ...  
+    QMutexLocker l(&calibrator_mutex_);
+    // FIXME: does not work ...
     if (start)
     {
         qDebug() << "pt: starting translation calibration";
-        calib_timer.start();
-        trans_calib.reset();
-        ui.sample_count_display->setText(QString());
+        calib_timer_.start();
+        trans_calib_.reset();
+        ui_.sample_count_display->setText(QString());
         // Tracker must run with zero'ed offset for calibration.
-        s.offset_fwd = 0;
-        s.offset_up = 0;
-        s.offset_right = 0;
+        settings_.offset_fwd = 0;
+        settings_.offset_up = 0;
+        settings_.offset_right = 0;
     }
     else
     {
-        calib_timer.stop();
+        calib_timer_.stop();
         qDebug() << "pt: stopping translation calibration";
         {
-            auto [tmp, nsamples] = trans_calib.get_estimate();
-            s.offset_fwd = int(tmp[0]);
-            s.offset_up = int(tmp[1]);
-            s.offset_right = int(tmp[2]);
+            auto [tmp, nsamples] = trans_calib_.get_estimate();
+            settings_.offset_fwd = int(tmp[0]);
+            settings_.offset_up = int(tmp[1]);
+            settings_.offset_right = int(tmp[2]);
         }
     }
-    ui.tx_spin->setEnabled(!start);
-    ui.ty_spin->setEnabled(!start);
-    ui.tz_spin->setEnabled(!start);
+    ui_.tx_spin->setEnabled(!start);
+    ui_.ty_spin->setEnabled(!start);
+    ui_.tz_spin->setEnabled(!start);
 
     if (start)
-        ui.tcalib_button->setText(tr("Stop calibration"));
+        ui_.tcalib_button->setText(tr("Stop calibration"));
     else
-        ui.tcalib_button->setText(tr("Start calibration"));
+        ui_.tcalib_button->setText(tr("Start calibration"));
+}
+
+
+void NeuralNetDialog::onSelectPoseNetFile()
+{
+    const auto root = get_default_model_directory();
+    // Start with the current setting
+    QString filename = settings_.posenet_file;
+    // If the filename is relative then assume that the file is located under the
+    // model directory. Under regular use this should always be the case.
+    if (QFileInfo(filename).isRelative())
+        filename = root.absoluteFilePath(filename);
+    filename = QFileDialog::getOpenFileName(this,
+        tr("Select Pose Net ONNX"), filename, tr("ONNX Files (*.onnx)"));
+    // In case the user aborted.
+    if (filename.isEmpty())
+        return;
+    // When a file under the model directory was selected we can get rid of the
+    // directory prefix. This is more robust than storing absolute paths, e.g.
+    // in case the user moves the opentrack install folder / reuses old settings.
+    // When the file is not in the model directory, we have to use the absolute path,
+    // which is also fine as developer feature.
+    if (filename.startsWith(root.absolutePath()))
+        filename = root.relativeFilePath(filename);
+    settings_.posenet_file = filename;
 }
 
 
-settings::settings() : opts("neuralnet-tracker") {}
+Settings::Settings() : opts("neuralnet-tracker") {}
 
 } // neuralnet_tracker_ns
 
-OPENTRACK_DECLARE_TRACKER(neuralnet_tracker, neuralnet_dialog, neuralnet_metadata)
+OPENTRACK_DECLARE_TRACKER(NeuralNetTracker, NeuralNetDialog, NeuralNetMetadata)