13 files changed, 1949 insertions, 0 deletions
diff --git a/tracker-neuralnet/BUILD.md b/tracker-neuralnet/BUILD.md
new file mode 100644
index 00000000..8bb694dd
--- /dev/null
+++ b/tracker-neuralnet/BUILD.md
@@ -0,0 +1,34 @@
+ONNX Runtime
+------------
+
+Recommended approach on Windws: Build a shared library from sources. Use static MSVC
+runtime library. The v1.6.0 branch should work fine.
+
+Source location: https://github.com/microsoft/onnxruntime
+
+In order to build, execute `build.bat` as follows:
+
+```
+$ build.bat --config RelWithDebInfo --x86 --build_dir .\buildx86\ \
+ --enable_msvc_static_runtime --build_shared_lib --skip_tests \
+ --cmake_generator "Visual Studio 15 2017"
+```
+
+Replace the argument for `--cmake_generator` if needed.
+
+The result is a messy directory `buildx86\RelWithDebInfo\RelWithDebInfo`,
+but no proper distribution. However only a few files are needed. They can
+be copied manually and are listed in the following in their respective folders:
+
+```
+onnxruntime-x86-release/include:
+cpu_provider_factory.h                 onnxruntime_cxx_api.h
+experimental_onnxruntime_cxx_api.h     onnxruntime_cxx_inline.h
+experimental_onnxruntime_cxx_inline.h  onnxruntime_session_options_config_keys.h
+onnxruntime_c_api.h
+
+onnxruntime-x86-release/lib:
+onnxruntime.dll  onnxruntime.exp  onnxruntime.lib  onnxruntime.pdb
+```
+
+See also https://www.onnxruntime.ai/docs/how-to/build.html
diff --git a/tracker-neuralnet/CMakeLists.txt b/tracker-neuralnet/CMakeLists.txt
new file mode 100644
index 00000000..d06f0c83
--- /dev/null
+++ b/tracker-neuralnet/CMakeLists.txt
@@ -0,0 +1,23 @@
+include(opentrack-opencv)
+find_package(OpenCV QUIET)
+find_package(OpenMP QUIET) # Used to control number of onnx threads.
+set(SDK_ONNX_LIBPATH "" CACHE FILEPATH "Full path of onnx library")
+
+if(OpenCV_FOUND AND SDK_ONNX_LIBPATH AND OpenMP_FOUND)
+    get_filename_component(ONNX_INCLUDE_DIR "${SDK_ONNX_LIBPATH}" DIRECTORY)
+    get_filename_component(ONNX_INCLUDE_DIR "${ONNX_INCLUDE_DIR}" ABSOLUTE)
+    set(ONNX_INCLUDE_DIR "${ONNX_INCLUDE_DIR}/../include")
+
+    otr_module(tracker-neuralnet)
+    target_include_directories(${self} SYSTEM PUBLIC 
+        ${OpenCV_INCLUDE_DIRS} "${ONNX_INCLUDE_DIR}")
+    target_link_libraries(${self} 
+        opentrack-cv "${SDK_ONNX_LIBPATH}" opencv_imgproc opencv_core 
+        opencv_imgcodecs opencv_calib3d
+        OpenMP::OpenMP_C)
+
+    install(
+        FILES "models/head-localizer.onnx" "models/head-pose.onnx"
+        DESTINATION "${opentrack-libexec}/models"
+        PERMISSIONS ${opentrack-perms-file})
+endif()
+\ No newline at end of file
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
new file mode 100644
index 00000000..2fad17aa
--- /dev/null
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
@@ -0,0 +1,917 @@
+/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ */
+
+#include "ftnoir_tracker_neuralnet.h"
+#include "compat/sleep.hpp"
+#include "compat/math-imports.hpp"
+#include "cv/init.hpp"
+#include <opencv2/core.hpp>
+#include <opencv2/core/hal/interface.h>
+#include <opencv2/core/types.hpp>
+#include <opencv2/calib3d.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include "compat/timer.hpp"
+#include <omp.h>
+
+#ifdef _MSC_VER
+#   pragma warning(disable : 4702)
+#endif
+
+#include <QMutexLocker>
+#include <QDebug>
+#include <QFile>
+
+#include <cstdio>
+#include <cmath>
+#include <algorithm>
+#include <chrono>
+
+// Some demo code for onnx
+// https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp
+// https://github.com/leimao/ONNX-Runtime-Inference/blob/main/src/inference.cpp
+
+namespace
+{
+
+using numeric_types::vec3;
+using numeric_types::vec2;
+using numeric_types::mat33;
+
+// Minimal difference if at all going from 1 to 2 threads.
+static constexpr int num_threads = 1;
+
+
+#if _MSC_VER
+std::wstring convert(const QString &s) { return s.toStdWString(); }
+#else
+std::string convert(const QString &s) { return s.toStdString(); }
+#endif
+
+
+float sigmoid(float x)
+{
+    return 1.f/(1.f + std::exp(-x));
+}
+
+
+template<class T>
+cv::Rect_<T> squarize(const cv::Rect_<T> &r)
+{
+    cv::Point_<T> c{r.x + r.width/T(2), r.y + r.height/T(2)};
+    const T sz = std::max(r.height, r.width);
+    return {c.x - sz/T(2), c.y - sz/T(2), sz, sz};
+}
+
+
+int compute_padding(const cv::Rect &r, int w, int h)
+{
+    using std::max;
+    return max({
+        max(-r.x, 0),
+        max(-r.y, 0),
+        max(r.x+r.width-w, 0),
+        max(r.y+r.height-h, 0)
+    });
+}
+
+
+cv::Rect2f unnormalize(const cv::Rect2f &r, int h, int w)
+{
+    auto unnorm = [](float x) -> float { return 0.5*(x+1); };
+    auto tl = r.tl();
+    auto br = r.br();
+    auto x0 = unnorm(tl.x)*w;
+    auto y0 = unnorm(tl.y)*h;
+    auto x1 = unnorm(br.x)*w;
+    auto y1 = unnorm(br.y)*h;
+    return {
+        x0, y0, x1-x0, y1-y0
+    };
+}
+
+cv::Point2f normalize(const cv::Point2f &p, int h, int w)
+{
+    return {
+        p.x/w*2.f-1.f,
+        p.y/h*2.f-1.f
+    };
+}
+
+
+mat33 rotation_from_two_vectors(const vec3 &a, const vec3 &b)
+{
+    vec3 axis = a.cross(b);
+    const float len_a = cv::norm(a);
+    const float len_b = cv::norm(b);
+    const float len_axis = cv::norm(axis);
+    const float sin_angle = std::clamp(len_axis / (len_a * len_b), -1.f, 1.f);
+    const float angle = std::asin(sin_angle);
+    axis *= angle/(1.e-12 + len_axis);
+    mat33 out;
+    cv::Rodrigues(axis, out);
+    return out;
+}
+
+
+/* Computes correction due to head being off screen center.
+    x, y: In screen space, i.e. in [-1,1]
+    focal_length_x: In screen space
+*/
+mat33 compute_rotation_correction(const cv::Point2f &p, float focal_length_x)
+{
+    return rotation_from_two_vectors(
+        {1.f,0.f,0.f}, 
+        {focal_length_x, p.y, p.x});
+}
+
+
+mat33 quaternion_to_mat33(const std::array<float,4> quat)
+{
+    mat33 m;
+    const float w = quat[0];
+    const float i = quat[1];
+    const float j = quat[2];
+    const float k = quat[3];
+    m(0,0) = 1.f - 2.f*(j*j + k*k);
+    m(1,0) =       2.f*(i*j + k*w);
+    m(2,0) =       2.f*(i*k - j*w);
+    m(0,1) =       2.f*(i*j - k*w);
+    m(1,1) = 1.f - 2.f*(i*i + k*k);
+    m(2,1) =       2.f*(j*k + i*w);
+    m(0,2) =       2.f*(i*k + j*w);
+    m(1,2) =       2.f*(j*k - i*w);
+    m(2,2) = 1.f - 2.f*(i*i + j*j);
+    return m;
+}
+
+
+template<class T>
+T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b)
+{
+    auto i = a & b;
+    return double{i.area()} / (a.area()+b.area()-i.area());
+}
+
+
+} // namespace
+
+
+namespace neuralnet_tracker_ns
+{
+
+
+int enum_to_fps(int value)
+{
+    switch (value)
+    {
+        case fps_30:        return 30;
+        case fps_60:        return 60;
+        default: [[fallthrough]];
+        case fps_default:   return 0;
+    }
+}
+
+
+Localizer::Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session) :
+    session{std::move(session)},
+    scaled_frame(input_img_height, input_img_width, CV_8U),
+    input_mat(input_img_height, input_img_width, CV_32F)
+{
+    // Only works when input_mat does not reallocated memory ...which it should not.
+    // Non-owning memory reference to input_mat?
+    // Note: shape = (bach x channels x h x w)
+    const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width };
+    input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4);
+
+    const std::int64_t output_shape[2] = { 1, 5 };
+    output_val = Ort::Value::CreateTensor<float>(allocator_info, results.data(), results.size(), output_shape, 2);
+}
+
+
+std::pair<float, cv::Rect2f> Localizer::run(
+    const cv::Mat &frame)
+{
+    auto p = input_mat.ptr(0);
+
+    cv::resize(frame, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA);
+    scaled_frame.convertTo(input_mat, CV_32F, 1./255., -0.5);
+
+    assert (input_mat.ptr(0) == p);
+    assert (!input_mat.empty() && input_mat.isContinuous());
+    assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height);
+
+    const char* input_names[] = {"x"};
+    const char* output_names[] = {"logit_box"};
+
+    //Timer t_; t_.start();
+
+    const auto nt = omp_get_num_threads();
+    omp_set_num_threads(num_threads);
+    session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1);
+    omp_set_num_threads(nt);
+
+    //qDebug() << "localizer: " << t_.elapsed_ms() << " ms\n";
+
+    const cv::Rect2f roi = unnormalize(cv::Rect2f{
+        results[1],
+        results[2],
+        results[3]-results[1], // Width
+        results[4]-results[2] // Height
+    }, frame.rows, frame.cols);
+    const float score = sigmoid(results[0]);
+
+    return { score, roi };
+}
+
+
+PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session) :
+    session{std::move(session)},
+    scaled_frame(input_img_height, input_img_width, CV_8U),
+    input_mat(input_img_height, input_img_width, CV_32F)
+{
+    {
+        const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width };
+        input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4);
+    }
+
+    {
+        const std::int64_t output_shape[2] = { 1, 3 };
+        output_val[0] = Ort::Value::CreateTensor<float>(
+            allocator_info, &output_coord[0], output_coord.rows, output_shape, 2);
+    }
+
+    {
+        const std::int64_t output_shape[2] = { 1, 4 };
+        output_val[1] = Ort::Value::CreateTensor<float>(
+            allocator_info, &output_quat[0], output_quat.rows, output_shape, 2);
+    }
+
+    {
+        const std::int64_t output_shape[2] = { 1, 4 };
+        output_val[2] = Ort::Value::CreateTensor<float>(
+            allocator_info, &output_box[0], output_box.rows, output_shape, 2);
+    }
+}
+
+
+int PoseEstimator::find_input_intensity_90_pct_quantile() const
+{
+    const int channels[] = { 0 };
+    const int hist_size[] = { 255 };
+    float range[] = { 0, 256 };
+    const float* ranges[] = { range };
+    cv::Mat hist;
+    cv::calcHist(&scaled_frame, 1,  channels, cv::Mat(), hist, 1, hist_size, ranges, true, false);
+    int gray_level = 0;
+    const int num_pixels_quantile = scaled_frame.total()*0.9f;
+    int num_pixels_accum = 0;
+    for (int i=0; i<hist_size[0]; ++i)
+    {
+        num_pixels_accum += hist.at<float>(i);
+        if (num_pixels_accum > num_pixels_quantile)
+        {
+            gray_level = i;
+            break;
+        }
+    }
+    return gray_level;
+}
+
+
+std::optional<PoseEstimator::Face> PoseEstimator::run(
+    const cv::Mat &frame, const cv::Rect &box)
+{
+    cv::Mat cropped;
+    
+    const int patch_size = std::max(box.width, box.height)*1.05;
+    const cv::Point2f patch_center = {
+        std::clamp<float>(box.x + 0.5f*box.width, 0.f, frame.cols),
+        std::clamp<float>(box.y + 0.5f*box.height, 0.f, frame.rows)
+    };
+    cv::getRectSubPix(frame, {patch_size, patch_size}, patch_center, cropped);
+
+    // Will get failure if patch_center is outside image boundaries.
+    // Have to catch this case.
+    if (cropped.rows != patch_size || cropped.cols != patch_size)
+        return {};
+    
+    auto p = input_mat.ptr(0);
+
+    cv::resize(cropped, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA);
+
+    // Automatic brightness amplification.
+    const int brightness = find_input_intensity_90_pct_quantile();
+    const double alpha = brightness<127 ? 0.5/std::max(5,brightness) : 1./255;
+    const double beta = -0.5;
+
+    scaled_frame.convertTo(input_mat, CV_32F, alpha, beta);
+
+    assert (input_mat.ptr(0) == p);
+    assert (!input_mat.empty() && input_mat.isContinuous());
+    assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height);
+
+    const char* input_names[] = {"x"};
+    const char* output_names[] = {"pos_size", "quat", "box"};
+
+    //Timer t_; t_.start();
+
+    const auto nt = omp_get_num_threads();
+    omp_set_num_threads(num_threads);
+    session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, output_val, 3);
+    omp_set_num_threads(nt);
+
+    // FIXME: Execution time fluctuates wildly. 19 to 26 ms. Why???
+    //        The instructions are always the same. Maybe a memory allocation
+    //        issue. The ONNX api suggests that tensor are allocated in an
+    //        arena. Does that matter? Maybe the issue is something else?
+
+    //qDebug() << "pose net: " << t_.elapsed_ms() << " ms\n";
+
+    // Perform coordinate transformation.
+    // From patch-local normalized in [-1,1] to
+    // frame unnormalized pixel coordinates.
+
+    const cv::Point2f center = patch_center + 
+        (0.5f*patch_size)*cv::Point2f{output_coord[0], output_coord[1]};
+
+    const float size = patch_size*0.5f*output_coord[2];
+
+    // Following Eigen which uses quat components in the order w, x, y, z.
+    const std::array<float,4> rotation = { 
+        output_quat[3], 
+        output_quat[0], 
+        output_quat[1], 
+        output_quat[2] };
+
+    const cv::Rect2f outbox = {
+        patch_center.x + (0.5f*patch_size)*output_box[0],
+        patch_center.y + (0.5f*patch_size)*output_box[1],
+        0.5f*patch_size*(output_box[2]-output_box[0]),
+        0.5f*patch_size*(output_box[3]-output_box[1])
+    };
+
+    return std::optional<Face>({
+        rotation, outbox, center, size
+    });
+}
+
+
+cv::Mat PoseEstimator::last_network_input() const
+{
+    cv::Mat ret;
+    if (!input_mat.empty())
+    {
+        input_mat.convertTo(ret, CV_8U, 255., 127.);
+        cv::cvtColor(ret, ret, cv::COLOR_GRAY2RGB);
+    }
+    return ret;
+}
+
+
+bool neuralnet_tracker::detect()
+{
+    // Note: BGR colors!
+    if (!last_localizer_roi || !last_roi ||
+        iou(*last_localizer_roi,*last_roi)<0.25)
+    {
+        auto [p, rect] = localizer->run(grayscale);
+        if (p > 0.5)
+        {
+            last_localizer_roi = rect;
+            last_roi = rect;
+        }
+    }
+
+    if (!last_roi)
+        return false;
+
+    auto face = poseestimator->run(grayscale, *last_roi);
+    
+    if (!face)
+    {
+        last_roi.reset();
+        return false;
+    }
+
+    last_roi = face->box;
+
+    Affine pose = compute_pose(*face);
+
+    draw_gizmos(frame, *face, pose);
+
+    {
+        QMutexLocker lck(&mtx);
+        this->pose_ = pose;
+    }
+
+    return true;
+}
+
+
+Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const
+{
+    const mat33 rot_correction = compute_rotation_correction(
+        normalize(face.center, frame.rows, frame.cols),
+        intrinsics.focal_length_w);
+
+    const mat33 m = rot_correction * quaternion_to_mat33(face.rotation);
+
+    /*
+         
+       hhhhhh  <- head size (meters)
+      \      | -----------------------
+       \     |                         \
+        \    |                          |
+         \   |                          |- tz (meters)
+          ____ <- face.size / width     |
+           \ |  |                       |
+            \|  |- focal length        /
+               ------------------------
+    */
+
+    // Compute the location the network outputs in 3d space.
+    const vec3 face_world_pos = image_to_world(face.center.x, face.center.y, face.size, head_size_mm);
+
+    // But this is in general not the location of the rotation joint in the neck.
+    // So we need an extra offset. Which we determine by solving
+    // z,y,z-pos = head_joint_loc + R_face * offset
+
+    const vec3 pos = face_world_pos
+        + m * vec3{
+            static_cast<float>(s.offset_fwd), 
+            static_cast<float>(s.offset_up),
+            static_cast<float>(s.offset_right)};
+
+    return { m, pos };
+}
+
+
+void neuralnet_tracker::draw_gizmos(
+    cv::Mat frame,
+    const PoseEstimator::Face &face,
+    const Affine& pose) const
+{
+    if (last_roi) 
+    {
+        const int col = 255;
+        cv::rectangle(frame, *last_roi, cv::Scalar(0, 255, 0), /*thickness=*/1);
+    }
+    if (last_localizer_roi)
+    {
+        const int col = 255;
+        cv::rectangle(frame, *last_localizer_roi, cv::Scalar(col, 0, 255-col), /*thickness=*/1);
+    }
+
+    if (face.size>=1.f)
+        cv::circle(frame, static_cast<cv::Point>(face.center), int(face.size), cv::Scalar(255,255,255), 2);
+    cv::circle(frame, static_cast<cv::Point>(face.center), 3, cv::Scalar(255,255,255), -1);
+
+    auto draw_coord_line = [&](int i, const cv::Scalar& color)
+    {
+        const float vx = -pose.R(2,i);
+        const float vy = -pose.R(1,i);
+        static constexpr float len = 100.f;
+        cv::Point q = face.center + len*cv::Point2f{vx, vy};
+        cv::line(frame, static_cast<cv::Point>(face.center), static_cast<cv::Point>(q), color, 2);
+    };
+    draw_coord_line(0, {0, 0, 255});
+    draw_coord_line(1, {0, 255, 0});
+    draw_coord_line(2, {255, 0, 0});
+
+    if (s.show_network_input)
+    {
+        cv::Mat netinput = poseestimator->last_network_input();
+        if (!netinput.empty())
+        {
+            const int w = std::min(netinput.cols, frame.cols);
+            const int h = std::min(netinput.rows, frame.rows);
+            cv::Rect roi(0, 0, w, h);
+            netinput(roi).copyTo(frame(roi));
+        }
+    }
+    {
+        // Draw the computed joint position
+        auto xy = world_to_image(pose.t);
+        cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1);
+    }
+
+    char buf[128];
+    ::snprintf(buf, sizeof(buf), "%d Hz, Max: %d ms", clamp(int(fps), 0, 9999), int(max_frame_time*1000.));
+    cv::putText(frame, buf, cv::Point(10, frame.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1);
+}
+
+
+neuralnet_tracker::neuralnet_tracker()
+{
+    opencv_init();
+    cv::setNumThreads(num_threads);
+}
+
+
+neuralnet_tracker::~neuralnet_tracker()
+{
+    requestInterruption();
+    wait();
+    // fast start/stop causes breakage
+    portable::sleep(1000);
+}
+
+
+module_status neuralnet_tracker::start_tracker(QFrame* videoframe)
+{
+    videoframe->show();
+    videoWidget = std::make_unique<cv_video_widget>(videoframe);
+    layout = std::make_unique<QHBoxLayout>();
+    layout->setContentsMargins(0, 0, 0, 0);
+    layout->addWidget(videoWidget.get());
+    videoframe->setLayout(layout.get());
+    videoWidget->show();
+    start();
+    return status_ok();
+}
+
+
+bool neuralnet_tracker::load_and_initialize_model()
+{
+    const QString localizer_model_path_enc =
+        OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-localizer.onnx";
+    const QString poseestimator_model_path_enc =
+        OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-pose.onnx";
+
+    try
+    {
+        env = Ort::Env{
+            OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR,
+            "tracker-neuralnet"
+        };
+        auto opts = Ort::SessionOptions{};
+        // Do thread settings here do anything?
+        // There is a warning which says to control number of threads via
+        // openmp settings. Which is what we do. omp_set_num_threads directly
+        // before running the inference pass.
+        opts.SetIntraOpNumThreads(num_threads);
+        opts.SetInterOpNumThreads(num_threads);
+        opts.SetGraphOptimizationLevel(
+            GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
+
+        opts.EnableCpuMemArena();
+        allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+
+        localizer.emplace(
+            allocator_info, 
+            Ort::Session{env, convert(localizer_model_path_enc).c_str(), opts});
+        
+        poseestimator.emplace(
+            allocator_info,
+            Ort::Session{env, convert(poseestimator_model_path_enc).c_str(), opts});
+    }
+    catch (const Ort::Exception &e)
+    {
+        qDebug() << "Failed to initialize the neural network models. ONNX error message: " 
+            << e.what();
+        return false;
+    }
+    return true;
+}
+
+
+bool neuralnet_tracker::open_camera()
+{
+    int fps = enum_to_fps(s.force_fps);
+
+    QMutexLocker l(&camera_mtx);
+
+    camera = video::make_camera(s.camera_name);
+
+    if (!camera)
+        return false;
+
+    video::impl::camera::info args {};
+
+    args.width = 320;
+    args.height = 240;
+
+    if (fps)
+        args.fps = fps;
+
+    if (!camera->start(args))
+    {
+        qDebug() << "neuralnet tracker: can't open camera";
+        return false;
+    }
+    return true;
+}
+
+
+void neuralnet_tracker::set_intrinsics()
+{
+    const int w = grayscale.cols, h = grayscale.rows;
+    const double diag_fov = s.fov * M_PI / 180.;
+    const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));
+    const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));
+    const double focal_length_w = 1. / tan(.5 * fov_w);
+    const double focal_length_h = 1. / tan(.5 * fov_h);
+
+    intrinsics.fov_h = fov_h;
+    intrinsics.fov_w = fov_w;
+    intrinsics.focal_length_w = focal_length_w;
+    intrinsics.focal_length_h = focal_length_h;
+}
+
+
+vec3 neuralnet_tracker::image_to_world(float x, float y, float size, float real_size) const
+{
+    // Compute the location the network outputs in 3d space.
+    const float xpos = -(intrinsics.focal_length_w * frame.cols * 0.5f) / size * real_size;
+    const float zpos = (x / frame.cols * 2.f - 1.f) * xpos / intrinsics.focal_length_w;
+    const float ypos = (y / frame.rows * 2.f - 1.f) * xpos / intrinsics.focal_length_h;
+    return {xpos, ypos, zpos};
+}
+
+
+vec2 neuralnet_tracker::world_to_image(const vec3& pos) const
+{
+    const float xscr = pos[2] / pos[0] * intrinsics.focal_length_w;
+    const float yscr = pos[1] / pos[0] * intrinsics.focal_length_h;
+    const float x = (xscr+1.)*0.5f*frame.cols;
+    const float y = (yscr+1.)*0.5f*frame.rows;
+    return {x, y};
+}
+
+
+void neuralnet_tracker::run()
+{
+    if (!open_camera())
+        return;
+
+    if (!load_and_initialize_model())
+        return;
+
+    std::chrono::high_resolution_clock clk;
+
+    while (!isInterruptionRequested())
+    {
+        auto t = clk.now();
+        {
+            QMutexLocker l(&camera_mtx);
+
+            auto [ img, res ] = camera->get_frame();
+
+            if (!res)
+            {
+                l.unlock();
+                portable::sleep(100);
+                continue;
+            }
+
+            auto color = cv::Mat(img.height, img.width, CV_8UC(img.channels), (void*)img.data, img.stride);
+            color.copyTo(frame);
+
+            switch (img.channels)
+            {
+            case 1:
+                grayscale.setTo(color); 
+                break;
+            case 3:
+                cv::cvtColor(color, grayscale, cv::COLOR_BGR2GRAY);
+                break;
+            default:
+                qDebug() << "Can't handle" << img.channels << "color channels";
+                return;
+            }
+        }
+
+        set_intrinsics();
+
+        detect();
+
+        if (frame.rows > 0)
+            videoWidget->update_image(frame);
+        
+        update_fps(
+            std::chrono::duration_cast<std::chrono::milliseconds>(
+                clk.now() - t).count()*1.e-3);
+    }
+}
+
+
+void neuralnet_tracker::update_fps(double dt)
+{
+    const double alpha = dt/(dt + RC);
+
+    if (dt > 1e-6)
+    {
+        fps *= 1 - alpha;
+        fps += alpha * 1./dt;
+    }
+
+    max_frame_time = std::max(max_frame_time, dt);
+}
+
+
+void neuralnet_tracker::data(double *data)
+{
+    Affine tmp = [&]()
+    {
+        QMutexLocker lck(&mtx);
+        return pose_;
+    }();
+
+    const auto& mx = tmp.R.col(0);
+    const auto& my = tmp.R.col(1);
+    const auto& mz = -tmp.R.col(2);
+
+    const float yaw = std::atan2(mx(2), mx(0));
+    const float pitch = -std::atan2(-mx(1), std::sqrt(mx(2)*mx(2)+mx(0)*mx(0)));
+    const float roll = std::atan2(-my(2), mz(2));
+    {
+        constexpr double rad2deg = 180/M_PI;
+        data[Yaw]   = rad2deg * yaw;
+        data[Pitch] = rad2deg * pitch;
+        data[Roll]  = rad2deg * roll;
+
+        // convert to cm
+        data[TX] = -tmp.t[2] * 0.1;
+        data[TY] = tmp.t[1] * 0.1;
+        data[TZ] = -tmp.t[0] * 0.1;
+    }
+}
+
+
+Affine neuralnet_tracker::pose()
+{
+    QMutexLocker lck(&mtx);
+    return pose_;
+}
+
+
+void neuralnet_dialog::make_fps_combobox()
+{
+    for (int k = 0; k < fps_MAX; k++)
+    {
+        const int hz = enum_to_fps(k);
+        const QString name = (hz == 0) ? tr("Default") : QString::number(hz);
+        ui.cameraFPS->addItem(name, k);
+    }
+}
+
+
+neuralnet_dialog::neuralnet_dialog() :
+    trans_calib(1, 2)
+{
+    ui.setupUi(this);
+
+    make_fps_combobox();
+    tie_setting(s.force_fps, ui.cameraFPS);
+
+    for (const auto& str : video::camera_names())
+        ui.cameraName->addItem(str);
+
+    tie_setting(s.camera_name, ui.cameraName);
+    tie_setting(s.fov, ui.cameraFOV);
+    tie_setting(s.offset_fwd, ui.tx_spin);
+    tie_setting(s.offset_up, ui.ty_spin);
+    tie_setting(s.offset_right, ui.tz_spin);
+    tie_setting(s.show_network_input, ui.showNetworkInput);
+
+    connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK()));
+    connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel()));
+    connect(ui.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings()));
+
+    connect(&s.camera_name, value_::value_changed<QString>(), this, &neuralnet_dialog::update_camera_settings_state);
+
+    update_camera_settings_state(s.camera_name);
+
+    connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step);
+    calib_timer.setInterval(35);
+    connect(ui.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool)));
+}
+
+
+void neuralnet_dialog::doOK()
+{
+    s.b->save();
+    close();
+}
+
+
+void neuralnet_dialog::doCancel()
+{
+    close();
+}
+
+
+void neuralnet_dialog::camera_settings()
+{
+    if (tracker)
+    {
+        QMutexLocker l(&tracker->camera_mtx);
+        (void)tracker->camera->show_dialog();
+    }
+    else
+        (void)video::show_dialog(s.camera_name);
+}
+
+
+void neuralnet_dialog::update_camera_settings_state(const QString& name)
+{
+    (void)name;
+    ui.camera_settings->setEnabled(true);
+}
+
+
+void neuralnet_dialog::register_tracker(ITracker * x)
+{
+    tracker = static_cast<neuralnet_tracker*>(x);
+    ui.tcalib_button->setEnabled(true);
+}
+
+
+void neuralnet_dialog::unregister_tracker()
+{
+    tracker = nullptr;
+    ui.tcalib_button->setEnabled(false);
+}
+
+
+void neuralnet_dialog::trans_calib_step()
+{
+    if (tracker)
+    {
+        const Affine X_CM = [&]() { 
+            QMutexLocker l(&calibrator_mutex);
+            return tracker->pose();
+        }();
+        trans_calib.update(X_CM.R, X_CM.t);
+        auto [_, nsamples] = trans_calib.get_estimate();
+
+        constexpr int min_yaw_samples = 15;
+        constexpr int min_pitch_samples = 12;
+        constexpr int min_samples = min_yaw_samples+min_pitch_samples;
+
+        // Don't bother counting roll samples. Roll calibration is hard enough
+        // that it's a hidden unsupported feature anyway.
+
+        QString sample_feedback;
+        if (nsamples[0] < min_yaw_samples)
+            sample_feedback = tr("%1 yaw samples. Yaw more to %2 samples for stable calibration.").arg(nsamples[0]).arg(min_yaw_samples);
+        else if (nsamples[1] < min_pitch_samples)
+            sample_feedback = tr("%1 pitch samples. Pitch more to %2 samples for stable calibration.").arg(nsamples[1]).arg(min_pitch_samples);
+        else
+        {
+            const int nsamples_total = nsamples[0] + nsamples[1];
+            sample_feedback = tr("%1 samples. Over %2, good!").arg(nsamples_total).arg(min_samples);
+        }
+        ui.sample_count_display->setText(sample_feedback);
+    }
+    else
+        startstop_trans_calib(false);
+}
+
+
+void neuralnet_dialog::startstop_trans_calib(bool start)
+{
+    QMutexLocker l(&calibrator_mutex);
+    // FIXME: does not work ...  
+    if (start)
+    {
+        qDebug() << "pt: starting translation calibration";
+        calib_timer.start();
+        trans_calib.reset();
+        ui.sample_count_display->setText(QString());
+        // Tracker must run with zero'ed offset for calibration.
+        s.offset_fwd = 0;
+        s.offset_up = 0;
+        s.offset_right = 0;
+    }
+    else
+    {
+        calib_timer.stop();
+        qDebug() << "pt: stopping translation calibration";
+        {
+            auto [tmp, nsamples] = trans_calib.get_estimate();
+            s.offset_fwd = int(tmp[0]);
+            s.offset_up = int(tmp[1]);
+            s.offset_right = int(tmp[2]);
+        }
+    }
+    ui.tx_spin->setEnabled(!start);
+    ui.ty_spin->setEnabled(!start);
+    ui.tz_spin->setEnabled(!start);
+
+    if (start)
+        ui.tcalib_button->setText(tr("Stop calibration"));
+    else
+        ui.tcalib_button->setText(tr("Start calibration"));
+}
+
+
+settings::settings() : opts("neuralnet-tracker") {}
+
+} // neuralnet_tracker_ns
+
+OPENTRACK_DECLARE_TRACKER(neuralnet_tracker, neuralnet_dialog, neuralnet_metadata)
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
new file mode 100644
index 00000000..e26689a4
--- /dev/null
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
@@ -0,0 +1,231 @@
+/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ */
+
+#pragma once
+
+#include "options/options.hpp"
+#include "api/plugin-api.hpp"
+#include "cv/video-widget.hpp"
+#include "cv/translation-calibrator.hpp"
+#include "cv/numeric.hpp"
+#include "compat/timer.hpp"
+#include "video/camera.hpp"
+#include "cv/affine.hpp"
+
+#include <QObject>
+#include <QThread>
+#include <QMutex>
+#include <QHBoxLayout>
+#include <QDialog>
+#include <QTimer>
+
+#include <memory>
+#include <cinttypes>
+
+#include <onnxruntime_cxx_api.h>
+
+#include <opencv2/core.hpp>
+#include <opencv2/core/types.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "ui_neuralnet-trackercontrols.h"
+
+namespace neuralnet_tracker_ns
+{
+
+
+using namespace options;
+
+
+enum fps_choices
+{
+    fps_default = 0,
+    fps_30      = 1,
+    fps_60      = 2,
+    fps_MAX     = 3
+};
+
+
+struct settings : opts {
+    value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters
+               offset_up { b, "offset-up", 0 },
+               offset_right { b, "offset-right", 0 };
+    value<QString> camera_name { b, "camera-name", ""};
+    value<int> fov { b, "field-of-view", 56 };
+    value<fps_choices> force_fps { b, "force-fps", fps_default };
+    value<bool> show_network_input { b, "show-network-input", false };
+    settings();
+};
+
+
+struct CamIntrinsics
+{
+    float focal_length_w;
+    float focal_length_h;
+    float fov_w;
+    float fov_h;
+};
+
+
+class Localizer
+{
+    public:
+        Localizer(Ort::MemoryInfo &allocator_info,
+                    Ort::Session &&session);
+        
+        // Returns bounding wrt image coordinate of the input image
+        // The preceeding float is the score for being a face normalized to [0,1].
+        std::pair<float, cv::Rect2f> run(
+            const cv::Mat &frame);
+
+    private:
+        inline static constexpr int input_img_width = 288;
+        inline static constexpr int input_img_height = 224;
+        Ort::Session session{nullptr};
+        // Inputs / outputs
+        cv::Mat scaled_frame{}, input_mat{};
+        Ort::Value input_val{nullptr}, output_val{nullptr};
+        std::array<float, 5> results;
+};
+
+
+class PoseEstimator
+{
+    public:
+        struct Face
+        {
+            std::array<float,4> rotation; // Quaternion, (w, x, y, z)
+            // The following quantities are defined wrt the image space of the input
+            cv::Rect2f box;
+            cv::Point2f center;
+            float size;
+        };
+
+        PoseEstimator(Ort::MemoryInfo &allocator_info,
+                        Ort::Session &&session);
+        // Inference
+        std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);
+        // Returns an image compatible with the 'frame' image for displaying.
+        cv::Mat last_network_input() const;
+
+    private:
+        // Operates on the private image data members
+        int find_input_intensity_90_pct_quantile() const;
+
+        inline static constexpr int input_img_width = 129;
+        inline static constexpr int input_img_height = 129;
+        Ort::Session session{nullptr};
+        // Inputs
+        cv::Mat scaled_frame{}, input_mat{};
+        Ort::Value input_val{nullptr};
+        // Outputs
+        cv::Vec<float, 3> output_coord{};
+        cv::Vec<float, 4> output_quat{};
+        cv::Vec<float, 4> output_box{};
+        Ort::Value output_val[3] = {
+            Ort::Value{nullptr}, 
+            Ort::Value{nullptr}, 
+            Ort::Value{nullptr}};
+};
+
+
+class neuralnet_tracker : protected virtual QThread, public ITracker
+{
+    Q_OBJECT
+public:
+    neuralnet_tracker();
+    ~neuralnet_tracker() override;
+    module_status start_tracker(QFrame* frame) override;
+    void data(double *data) override;
+    void run() override;
+    Affine pose();
+
+    QMutex camera_mtx;
+    std::unique_ptr<video::impl::camera> camera;
+
+private:
+    bool detect();
+    bool open_camera();
+    void set_intrinsics();
+    bool load_and_initialize_model();
+    void draw_gizmos(
+        cv::Mat frame,  
+        const PoseEstimator::Face &face,
+        const Affine& pose) const;
+    void update_fps(double dt);
+
+    Affine compute_pose(const PoseEstimator::Face &face) const;
+    numeric_types::vec3 image_to_world(float x, float y, float size, float real_size) const;
+    numeric_types::vec2 world_to_image(const numeric_types::vec3& p) const;
+
+    settings s;
+    std::optional<Localizer> localizer;
+    std::optional<PoseEstimator> poseestimator;
+    Ort::Env env{nullptr};
+    Ort::MemoryInfo allocator_info{nullptr};
+
+    CamIntrinsics intrinsics{};
+    cv::Mat frame, grayscale;
+    std::optional<cv::Rect2f> last_localizer_roi;
+    std::optional<cv::Rect2f> last_roi;
+    static constexpr float head_size_mm = 200.f;
+
+    double fps = 0;
+    double max_frame_time = 0;
+    static constexpr double RC = .25;
+
+    QMutex mtx; // Protects the pose
+    Affine pose_;
+
+    std::unique_ptr<cv_video_widget> videoWidget;
+    std::unique_ptr<QHBoxLayout> layout;
+};
+
+
+class neuralnet_dialog : public ITrackerDialog
+{
+    Q_OBJECT
+public:
+    neuralnet_dialog();
+    void register_tracker(ITracker * x) override;
+    void unregister_tracker() override;
+private:
+    void make_fps_combobox();
+
+    Ui::Form ui;
+    settings s;
+    
+    // Calibration code mostly taken from point tracker
+    QTimer calib_timer;
+    TranslationCalibrator trans_calib;
+    QMutex calibrator_mutex;
+
+    neuralnet_tracker* tracker = nullptr;
+
+private Q_SLOTS:
+    void doOK();
+    void doCancel();
+    void camera_settings();
+    void update_camera_settings_state(const QString& name);
+    void startstop_trans_calib(bool start);
+    void trans_calib_step();
+};
+
+
+class neuralnet_metadata : public Metadata
+{
+    Q_OBJECT
+    QString name() override { return QString("neuralnet tracker"); }
+    QIcon icon() override { return QIcon(":/images/neuralnet.png"); }
+};
+
+
+} // neuralnet_tracker_ns
+
+using neuralnet_tracker_ns::neuralnet_tracker;
+using neuralnet_tracker_ns::neuralnet_dialog;
+using neuralnet_tracker_ns::neuralnet_metadata;
+\ No newline at end of file
diff --git a/tracker-neuralnet/images/neuralnet.png b/tracker-neuralnet/images/neuralnet.png
new file mode 100644
index 00000000..1a10c53c
--- /dev/null
+++ b/tracker-neuralnet/images/neuralnet.png
diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts
new file mode 100644
index 00000000..fb6c3348
--- /dev/null
+++ b/tracker-neuralnet/lang/nl_NL.ts
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE TS>
+<TS version="2.1" language="nl_NL">
+<context>
+    <name>Form</name>
+    <message>
+        <source>Tracker settings</source>
+        <translation>Tracker-instellingen</translation>
+    </message>
+    <message>
+        <source>Frames per second</source>
+        <translation>Frames per seconde</translation>
+    </message>
+    <message>
+        <source>Camera name</source>
+        <translation>Cameranaam</translation>
+    </message>
+    <message>
+        <source>Diagonal FOV</source>
+        <translation>Diagonale FOV</translation>
+    </message>
+    <message>
+        <source>Camera settings</source>
+        <translation>Camera-instellingen</translation>
+    </message>
+    <message>
+        <source>Camera Configuration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Head Center Offset</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source> mm</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Use only yaw and pitch while calibrating.
+Don&apos;t roll or change position.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Start calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Right</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Forward</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Up</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Show Network Input</source>
+        <translation type="unfinished"></translation>
+    </message>
+</context>
+<context>
+    <name>neuralnet_tracker_ns::neuralnet_dialog</name>
+    <message>
+        <source>Default</source>
+        <translation type="unfinished">Standaard</translation>
+    </message>
+    <message>
+        <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 samples. Over %2, good!</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Stop calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Start calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+</context>
+</TS>
diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts
new file mode 100644
index 00000000..f1ba9a92
--- /dev/null
+++ b/tracker-neuralnet/lang/ru_RU.ts
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE TS>
+<TS version="2.1" language="ru_RU">
+<context>
+    <name>Form</name>
+    <message>
+        <source>Tracker settings</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Diagonal FOV</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera settings</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Frames per second</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera name</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera Configuration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Head Center Offset</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source> mm</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Use only yaw and pitch while calibrating.
+Don&apos;t roll or change position.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Start calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Right</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Forward</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Up</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Show Network Input</source>
+        <translation type="unfinished"></translation>
+    </message>
+</context>
+<context>
+    <name>neuralnet_tracker_ns::neuralnet_dialog</name>
+    <message>
+        <source>Default</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 samples. Over %2, good!</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Stop calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Start calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+</context>
+</TS>
diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts
new file mode 100644
index 00000000..52b8aded
--- /dev/null
+++ b/tracker-neuralnet/lang/stub.ts
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE TS>
+<TS version="2.1">
+<context>
+    <name>Form</name>
+    <message>
+        <source>Tracker settings</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Diagonal FOV</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera settings</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Frames per second</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera name</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera Configuration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Head Center Offset</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source> mm</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Use only yaw and pitch while calibrating.
+Don&apos;t roll or change position.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Start calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Right</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Forward</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Up</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Show Network Input</source>
+        <translation type="unfinished"></translation>
+    </message>
+</context>
+<context>
+    <name>neuralnet_tracker_ns::neuralnet_dialog</name>
+    <message>
+        <source>Default</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 samples. Over %2, good!</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Stop calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Start calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+</context>
+</TS>
diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts
new file mode 100644
index 00000000..2d0dd8ff
--- /dev/null
+++ b/tracker-neuralnet/lang/zh_CN.ts
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE TS>
+<TS version="2.1">
+<context>
+    <name>Form</name>
+    <message>
+        <source>Tracker settings</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Diagonal FOV</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera name</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Frames per second</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera settings</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Camera Configuration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Head Center Offset</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source> mm</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Use only yaw and pitch while calibrating.
+Don&apos;t roll or change position.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Start calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Right</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Forward</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Up</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Show Network Input</source>
+        <translation type="unfinished"></translation>
+    </message>
+</context>
+<context>
+    <name>neuralnet_tracker_ns::neuralnet_dialog</name>
+    <message>
+        <source>Default</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>%1 samples. Over %2, good!</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Stop calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <source>Start calibration</source>
+        <translation type="unfinished"></translation>
+    </message>
+</context>
+</TS>
diff --git a/tracker-neuralnet/models/head-localizer.onnx b/tracker-neuralnet/models/head-localizer.onnx
new file mode 100644
index 00000000..c128f89d
--- /dev/null
+++ b/tracker-neuralnet/models/head-localizer.onnx
diff --git a/tracker-neuralnet/models/head-pose.onnx b/tracker-neuralnet/models/head-pose.onnx
new file mode 100644
index 00000000..dcb55dcc
--- /dev/null
+++ b/tracker-neuralnet/models/head-pose.onnx
diff --git a/tracker-neuralnet/neuralnet-tracker.qrc b/tracker-neuralnet/neuralnet-tracker.qrc
new file mode 100644
index 00000000..d30ec313
--- /dev/null
+++ b/tracker-neuralnet/neuralnet-tracker.qrc
@@ -0,0 +1,5 @@
+<RCC>
+    <qresource prefix="/">
+        <file>images/neuralnet.png</file>
+    </qresource>
+</RCC>
diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui
new file mode 100644
index 00000000..f16b5807
--- /dev/null
+++ b/tracker-neuralnet/neuralnet-trackercontrols.ui
@@ -0,0 +1,375 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Form</class>
+ <widget class="QWidget" name="Form">
+  <property name="windowModality">
+   <enum>Qt::NonModal</enum>
+  </property>
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>727</width>
+    <height>202</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Tracker settings</string>
+  </property>
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="5" column="0">
+    <widget class="QDialogButtonBox" name="buttonBox">
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="0">
+    <widget class="QFrame" name="frame_3">
+     <property name="frameShape">
+      <enum>QFrame::StyledPanel</enum>
+     </property>
+     <property name="frameShadow">
+      <enum>QFrame::Raised</enum>
+     </property>
+     <layout class="QHBoxLayout" name="horizontalLayout">
+      <property name="spacing">
+       <number>0</number>
+      </property>
+      <property name="leftMargin">
+       <number>0</number>
+      </property>
+      <property name="topMargin">
+       <number>0</number>
+      </property>
+      <property name="rightMargin">
+       <number>0</number>
+      </property>
+      <property name="bottomMargin">
+       <number>0</number>
+      </property>
+      <item>
+       <widget class="QGroupBox" name="groupBox">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Fixed" vsizetype="Preferred">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+        <property name="title">
+         <string>Camera Configuration</string>
+        </property>
+        <layout class="QGridLayout" name="gridLayout_4">
+         <item row="2" column="1">
+          <widget class="QComboBox" name="cameraName">
+           <property name="sizePolicy">
+            <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+             <horstretch>0</horstretch>
+             <verstretch>0</verstretch>
+            </sizepolicy>
+           </property>
+          </widget>
+         </item>
+         <item row="0" column="1">
+          <widget class="QSpinBox" name="cameraFOV">
+           <property name="sizePolicy">
+            <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+             <horstretch>0</horstretch>
+             <verstretch>0</verstretch>
+            </sizepolicy>
+           </property>
+           <property name="locale">
+            <locale language="English" country="UnitedStates"/>
+           </property>
+           <property name="minimum">
+            <number>35</number>
+           </property>
+           <property name="maximum">
+            <number>90</number>
+           </property>
+          </widget>
+         </item>
+         <item row="1" column="0">
+          <widget class="QLabel" name="label_12">
+           <property name="text">
+            <string>Frames per second</string>
+           </property>
+          </widget>
+         </item>
+         <item row="2" column="0">
+          <widget class="QLabel" name="label_10">
+           <property name="text">
+            <string>Camera name</string>
+           </property>
+          </widget>
+         </item>
+         <item row="0" column="0">
+          <widget class="QLabel" name="label_9">
+           <property name="text">
+            <string>Diagonal FOV</string>
+           </property>
+          </widget>
+         </item>
+         <item row="1" column="1">
+          <widget class="QComboBox" name="cameraFPS">
+           <property name="sizePolicy">
+            <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+             <horstretch>0</horstretch>
+             <verstretch>0</verstretch>
+            </sizepolicy>
+           </property>
+          </widget>
+         </item>
+         <item row="3" column="1">
+          <widget class="QPushButton" name="camera_settings">
+           <property name="sizePolicy">
+            <sizepolicy hsizetype="Preferred" vsizetype="Maximum">
+             <horstretch>0</horstretch>
+             <verstretch>0</verstretch>
+            </sizepolicy>
+           </property>
+           <property name="text">
+            <string>Camera settings</string>
+           </property>
+          </widget>
+         </item>
+        </layout>
+       </widget>
+      </item>
+      <item>
+       <widget class="QGroupBox" name="groupBox_10">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+        <property name="title">
+         <string>Head Center Offset</string>
+        </property>
+        <layout class="QGridLayout" name="gridLayout_5">
+         <item row="0" column="0">
+          <widget class="QFrame" name="frame_4">
+           <property name="sizePolicy">
+            <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+             <horstretch>0</horstretch>
+             <verstretch>0</verstretch>
+            </sizepolicy>
+           </property>
+           <property name="maximumSize">
+            <size>
+             <width>16777215</width>
+             <height>16777215</height>
+            </size>
+           </property>
+           <property name="frameShape">
+            <enum>QFrame::NoFrame</enum>
+           </property>
+           <property name="frameShadow">
+            <enum>QFrame::Raised</enum>
+           </property>
+           <layout class="QGridLayout" name="gridLayout_11">
+            <item row="1" column="1">
+             <widget class="QSpinBox" name="ty_spin">
+              <property name="maximumSize">
+               <size>
+                <width>150</width>
+                <height>16777215</height>
+               </size>
+              </property>
+              <property name="suffix">
+               <string> mm</string>
+              </property>
+              <property name="minimum">
+               <number>-65535</number>
+              </property>
+              <property name="maximum">
+               <number>65536</number>
+              </property>
+             </widget>
+            </item>
+            <item row="2" column="0">
+             <widget class="QLabel" name="label_66">
+              <property name="sizePolicy">
+               <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
+                <horstretch>0</horstretch>
+                <verstretch>0</verstretch>
+               </sizepolicy>
+              </property>
+              <property name="text">
+               <string>Right</string>
+              </property>
+             </widget>
+            </item>
+            <item row="2" column="1">
+             <widget class="QSpinBox" name="tz_spin">
+              <property name="maximumSize">
+               <size>
+                <width>150</width>
+                <height>16777215</height>
+               </size>
+              </property>
+              <property name="suffix">
+               <string> mm</string>
+              </property>
+              <property name="minimum">
+               <number>-65535</number>
+              </property>
+              <property name="maximum">
+               <number>65536</number>
+              </property>
+             </widget>
+            </item>
+            <item row="0" column="0">
+             <widget class="QLabel" name="label_61">
+              <property name="sizePolicy">
+               <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
+                <horstretch>0</horstretch>
+                <verstretch>0</verstretch>
+               </sizepolicy>
+              </property>
+              <property name="text">
+               <string>Forward</string>
+              </property>
+             </widget>
+            </item>
+            <item row="0" column="1">
+             <widget class="QSpinBox" name="tx_spin">
+              <property name="maximumSize">
+               <size>
+                <width>150</width>
+                <height>16777215</height>
+               </size>
+              </property>
+              <property name="suffix">
+               <string> mm</string>
+              </property>
+              <property name="minimum">
+               <number>-65535</number>
+              </property>
+              <property name="maximum">
+               <number>65536</number>
+              </property>
+             </widget>
+            </item>
+            <item row="1" column="0">
+             <widget class="QLabel" name="label_62">
+              <property name="sizePolicy">
+               <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
+                <horstretch>0</horstretch>
+                <verstretch>0</verstretch>
+               </sizepolicy>
+              </property>
+              <property name="text">
+               <string>Up</string>
+              </property>
+             </widget>
+            </item>
+           </layout>
+          </widget>
+         </item>
+         <item row="0" column="1">
+          <widget class="QFrame" name="frame_5">
+           <property name="sizePolicy">
+            <sizepolicy hsizetype="Preferred" vsizetype="Expanding">
+             <horstretch>0</horstretch>
+             <verstretch>0</verstretch>
+            </sizepolicy>
+           </property>
+           <property name="minimumSize">
+            <size>
+             <width>260</width>
+             <height>0</height>
+            </size>
+           </property>
+           <property name="frameShape">
+            <enum>QFrame::NoFrame</enum>
+           </property>
+           <property name="frameShadow">
+            <enum>QFrame::Raised</enum>
+           </property>
+           <layout class="QVBoxLayout" name="verticalLayout_2">
+            <item>
+             <widget class="QLabel" name="label_59">
+              <property name="text">
+               <string>Use only yaw and pitch while calibrating.
+Don't roll or change position.</string>
+              </property>
+              <property name="alignment">
+               <set>Qt::AlignCenter</set>
+              </property>
+              <property name="wordWrap">
+               <bool>true</bool>
+              </property>
+              <property name="openExternalLinks">
+               <bool>false</bool>
+              </property>
+             </widget>
+            </item>
+            <item>
+             <widget class="QLabel" name="sample_count_display">
+              <property name="sizePolicy">
+               <sizepolicy hsizetype="Minimum" vsizetype="Maximum">
+                <horstretch>0</horstretch>
+                <verstretch>0</verstretch>
+               </sizepolicy>
+              </property>
+              <property name="text">
+               <string/>
+              </property>
+              <property name="wordWrap">
+               <bool>true</bool>
+              </property>
+             </widget>
+            </item>
+            <item>
+             <widget class="QPushButton" name="tcalib_button">
+              <property name="enabled">
+               <bool>false</bool>
+              </property>
+              <property name="text">
+               <string>Start calibration</string>
+              </property>
+              <property name="checkable">
+               <bool>true</bool>
+              </property>
+             </widget>
+            </item>
+           </layout>
+          </widget>
+         </item>
+        </layout>
+       </widget>
+      </item>
+     </layout>
+    </widget>
+   </item>
+   <item row="4" column="0">
+    <widget class="QCheckBox" name="showNetworkInput">
+     <property name="text">
+      <string>Show Network Input</string>
+     </property>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections/>
+ <designerdata>
+  <property name="gridDeltaX">
+   <number>10</number>
+  </property>
+  <property name="gridDeltaY">
+   <number>10</number>
+  </property>
+  <property name="gridSnapX">
+   <bool>false</bool>
+  </property>
+  <property name="gridSnapY">
+   <bool>false</bool>
+  </property>
+  <property name="gridVisible">
+   <bool>true</bool>
+  </property>
+ </designerdata>
+</ui>