diff options
author | Michael Welter <michael@welter-4d.de> | 2021-03-13 11:27:03 +0100 |
---|---|---|
committer | Michael Welter <michael@welter-4d.de> | 2021-05-02 18:20:41 +0200 |
commit | 7e582247ab0942398225b3e190d9fadafcf19950 (patch) | |
tree | 394d8ca7c1aedf058208191b19b8e3e9fe4d134c /tracker-neuralnet | |
parent | 058942f40e17e091b91df5436d771d61203ccc73 (diff) |
Add a face alignment based tracker
Powered by AI!
Models were generated with code from
https://github.com/DaWelter/neuralnet-tracker-traincode/releases/tag/v0.1
Diffstat (limited to 'tracker-neuralnet')
-rw-r--r-- | tracker-neuralnet/BUILD.md | 34 | ||||
-rw-r--r-- | tracker-neuralnet/CMakeLists.txt | 23 | ||||
-rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 917 | ||||
-rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.h | 231 | ||||
-rw-r--r-- | tracker-neuralnet/images/neuralnet.png | bin | 0 -> 595 bytes | |||
-rw-r--r-- | tracker-neuralnet/lang/nl_NL.ts | 91 | ||||
-rw-r--r-- | tracker-neuralnet/lang/ru_RU.ts | 91 | ||||
-rw-r--r-- | tracker-neuralnet/lang/stub.ts | 91 | ||||
-rw-r--r-- | tracker-neuralnet/lang/zh_CN.ts | 91 | ||||
-rw-r--r-- | tracker-neuralnet/models/head-localizer.onnx | bin | 0 -> 279403 bytes | |||
-rw-r--r-- | tracker-neuralnet/models/head-pose.onnx | bin | 0 -> 13047683 bytes | |||
-rw-r--r-- | tracker-neuralnet/neuralnet-tracker.qrc | 5 | ||||
-rw-r--r-- | tracker-neuralnet/neuralnet-trackercontrols.ui | 375 |
13 files changed, 1949 insertions, 0 deletions
diff --git a/tracker-neuralnet/BUILD.md b/tracker-neuralnet/BUILD.md new file mode 100644 index 00000000..8bb694dd --- /dev/null +++ b/tracker-neuralnet/BUILD.md @@ -0,0 +1,34 @@ +ONNX Runtime +------------ + +Recommended approach on Windws: Build a shared library from sources. Use static MSVC +runtime library. The v1.6.0 branch should work fine. + +Source location: https://github.com/microsoft/onnxruntime + +In order to build, execute `build.bat` as follows: + +``` +$ build.bat --config RelWithDebInfo --x86 --build_dir .\buildx86\ \ + --enable_msvc_static_runtime --build_shared_lib --skip_tests \ + --cmake_generator "Visual Studio 15 2017" +``` + +Replace the argument for `--cmake_generator` if needed. + +The result is a messy directory `buildx86\RelWithDebInfo\RelWithDebInfo`, +but no proper distribution. However only a few files are needed. They can +be copied manually and are listed in the following in their respective folders: + +``` +onnxruntime-x86-release/include: +cpu_provider_factory.h onnxruntime_cxx_api.h +experimental_onnxruntime_cxx_api.h onnxruntime_cxx_inline.h +experimental_onnxruntime_cxx_inline.h onnxruntime_session_options_config_keys.h +onnxruntime_c_api.h + +onnxruntime-x86-release/lib: +onnxruntime.dll onnxruntime.exp onnxruntime.lib onnxruntime.pdb +``` + +See also https://www.onnxruntime.ai/docs/how-to/build.html diff --git a/tracker-neuralnet/CMakeLists.txt b/tracker-neuralnet/CMakeLists.txt new file mode 100644 index 00000000..d06f0c83 --- /dev/null +++ b/tracker-neuralnet/CMakeLists.txt @@ -0,0 +1,23 @@ +include(opentrack-opencv) +find_package(OpenCV QUIET) +find_package(OpenMP QUIET) # Used to control number of onnx threads. +set(SDK_ONNX_LIBPATH "" CACHE FILEPATH "Full path of onnx library") + +if(OpenCV_FOUND AND SDK_ONNX_LIBPATH AND OpenMP_FOUND) + get_filename_component(ONNX_INCLUDE_DIR "${SDK_ONNX_LIBPATH}" DIRECTORY) + get_filename_component(ONNX_INCLUDE_DIR "${ONNX_INCLUDE_DIR}" ABSOLUTE) + set(ONNX_INCLUDE_DIR "${ONNX_INCLUDE_DIR}/../include") + + otr_module(tracker-neuralnet) + target_include_directories(${self} SYSTEM PUBLIC + ${OpenCV_INCLUDE_DIRS} "${ONNX_INCLUDE_DIR}") + target_link_libraries(${self} + opentrack-cv "${SDK_ONNX_LIBPATH}" opencv_imgproc opencv_core + opencv_imgcodecs opencv_calib3d + OpenMP::OpenMP_C) + + install( + FILES "models/head-localizer.onnx" "models/head-pose.onnx" + DESTINATION "${opentrack-libexec}/models" + PERMISSIONS ${opentrack-perms-file}) +endif()
\ No newline at end of file diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp new file mode 100644 index 00000000..2fad17aa --- /dev/null +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -0,0 +1,917 @@ +/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + */ + +#include "ftnoir_tracker_neuralnet.h" +#include "compat/sleep.hpp" +#include "compat/math-imports.hpp" +#include "cv/init.hpp" +#include <opencv2/core.hpp> +#include <opencv2/core/hal/interface.h> +#include <opencv2/core/types.hpp> +#include <opencv2/calib3d.hpp> +#include <opencv2/imgcodecs.hpp> +#include "compat/timer.hpp" +#include <omp.h> + +#ifdef _MSC_VER +# pragma warning(disable : 4702) +#endif + +#include <QMutexLocker> +#include <QDebug> +#include <QFile> + +#include <cstdio> +#include <cmath> +#include <algorithm> +#include <chrono> + +// Some demo code for onnx +// https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp +// https://github.com/leimao/ONNX-Runtime-Inference/blob/main/src/inference.cpp + +namespace +{ + +using numeric_types::vec3; +using numeric_types::vec2; +using numeric_types::mat33; + +// Minimal difference if at all going from 1 to 2 threads. +static constexpr int num_threads = 1; + + +#if _MSC_VER +std::wstring convert(const QString &s) { return s.toStdWString(); } +#else +std::string convert(const QString &s) { return s.toStdString(); } +#endif + + +float sigmoid(float x) +{ + return 1.f/(1.f + std::exp(-x)); +} + + +template<class T> +cv::Rect_<T> squarize(const cv::Rect_<T> &r) +{ + cv::Point_<T> c{r.x + r.width/T(2), r.y + r.height/T(2)}; + const T sz = std::max(r.height, r.width); + return {c.x - sz/T(2), c.y - sz/T(2), sz, sz}; +} + + +int compute_padding(const cv::Rect &r, int w, int h) +{ + using std::max; + return max({ + max(-r.x, 0), + max(-r.y, 0), + max(r.x+r.width-w, 0), + max(r.y+r.height-h, 0) + }); +} + + +cv::Rect2f unnormalize(const cv::Rect2f &r, int h, int w) +{ + auto unnorm = [](float x) -> float { return 0.5*(x+1); }; + auto tl = r.tl(); + auto br = r.br(); + auto x0 = unnorm(tl.x)*w; + auto y0 = unnorm(tl.y)*h; + auto x1 = unnorm(br.x)*w; + auto y1 = unnorm(br.y)*h; + return { + x0, y0, x1-x0, y1-y0 + }; +} + +cv::Point2f normalize(const cv::Point2f &p, int h, int w) +{ + return { + p.x/w*2.f-1.f, + p.y/h*2.f-1.f + }; +} + + +mat33 rotation_from_two_vectors(const vec3 &a, const vec3 &b) +{ + vec3 axis = a.cross(b); + const float len_a = cv::norm(a); + const float len_b = cv::norm(b); + const float len_axis = cv::norm(axis); + const float sin_angle = std::clamp(len_axis / (len_a * len_b), -1.f, 1.f); + const float angle = std::asin(sin_angle); + axis *= angle/(1.e-12 + len_axis); + mat33 out; + cv::Rodrigues(axis, out); + return out; +} + + +/* Computes correction due to head being off screen center. + x, y: In screen space, i.e. in [-1,1] + focal_length_x: In screen space +*/ +mat33 compute_rotation_correction(const cv::Point2f &p, float focal_length_x) +{ + return rotation_from_two_vectors( + {1.f,0.f,0.f}, + {focal_length_x, p.y, p.x}); +} + + +mat33 quaternion_to_mat33(const std::array<float,4> quat) +{ + mat33 m; + const float w = quat[0]; + const float i = quat[1]; + const float j = quat[2]; + const float k = quat[3]; + m(0,0) = 1.f - 2.f*(j*j + k*k); + m(1,0) = 2.f*(i*j + k*w); + m(2,0) = 2.f*(i*k - j*w); + m(0,1) = 2.f*(i*j - k*w); + m(1,1) = 1.f - 2.f*(i*i + k*k); + m(2,1) = 2.f*(j*k + i*w); + m(0,2) = 2.f*(i*k + j*w); + m(1,2) = 2.f*(j*k - i*w); + m(2,2) = 1.f - 2.f*(i*i + j*j); + return m; +} + + +template<class T> +T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b) +{ + auto i = a & b; + return double{i.area()} / (a.area()+b.area()-i.area()); +} + + +} // namespace + + +namespace neuralnet_tracker_ns +{ + + +int enum_to_fps(int value) +{ + switch (value) + { + case fps_30: return 30; + case fps_60: return 60; + default: [[fallthrough]]; + case fps_default: return 0; + } +} + + +Localizer::Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session) : + session{std::move(session)}, + scaled_frame(input_img_height, input_img_width, CV_8U), + input_mat(input_img_height, input_img_width, CV_32F) +{ + // Only works when input_mat does not reallocated memory ...which it should not. + // Non-owning memory reference to input_mat? + // Note: shape = (bach x channels x h x w) + const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width }; + input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4); + + const std::int64_t output_shape[2] = { 1, 5 }; + output_val = Ort::Value::CreateTensor<float>(allocator_info, results.data(), results.size(), output_shape, 2); +} + + +std::pair<float, cv::Rect2f> Localizer::run( + const cv::Mat &frame) +{ + auto p = input_mat.ptr(0); + + cv::resize(frame, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA); + scaled_frame.convertTo(input_mat, CV_32F, 1./255., -0.5); + + assert (input_mat.ptr(0) == p); + assert (!input_mat.empty() && input_mat.isContinuous()); + assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height); + + const char* input_names[] = {"x"}; + const char* output_names[] = {"logit_box"}; + + //Timer t_; t_.start(); + + const auto nt = omp_get_num_threads(); + omp_set_num_threads(num_threads); + session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1); + omp_set_num_threads(nt); + + //qDebug() << "localizer: " << t_.elapsed_ms() << " ms\n"; + + const cv::Rect2f roi = unnormalize(cv::Rect2f{ + results[1], + results[2], + results[3]-results[1], // Width + results[4]-results[2] // Height + }, frame.rows, frame.cols); + const float score = sigmoid(results[0]); + + return { score, roi }; +} + + +PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session) : + session{std::move(session)}, + scaled_frame(input_img_height, input_img_width, CV_8U), + input_mat(input_img_height, input_img_width, CV_32F) +{ + { + const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width }; + input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4); + } + + { + const std::int64_t output_shape[2] = { 1, 3 }; + output_val[0] = Ort::Value::CreateTensor<float>( + allocator_info, &output_coord[0], output_coord.rows, output_shape, 2); + } + + { + const std::int64_t output_shape[2] = { 1, 4 }; + output_val[1] = Ort::Value::CreateTensor<float>( + allocator_info, &output_quat[0], output_quat.rows, output_shape, 2); + } + + { + const std::int64_t output_shape[2] = { 1, 4 }; + output_val[2] = Ort::Value::CreateTensor<float>( + allocator_info, &output_box[0], output_box.rows, output_shape, 2); + } +} + + +int PoseEstimator::find_input_intensity_90_pct_quantile() const +{ + const int channels[] = { 0 }; + const int hist_size[] = { 255 }; + float range[] = { 0, 256 }; + const float* ranges[] = { range }; + cv::Mat hist; + cv::calcHist(&scaled_frame, 1, channels, cv::Mat(), hist, 1, hist_size, ranges, true, false); + int gray_level = 0; + const int num_pixels_quantile = scaled_frame.total()*0.9f; + int num_pixels_accum = 0; + for (int i=0; i<hist_size[0]; ++i) + { + num_pixels_accum += hist.at<float>(i); + if (num_pixels_accum > num_pixels_quantile) + { + gray_level = i; + break; + } + } + return gray_level; +} + + +std::optional<PoseEstimator::Face> PoseEstimator::run( + const cv::Mat &frame, const cv::Rect &box) +{ + cv::Mat cropped; + + const int patch_size = std::max(box.width, box.height)*1.05; + const cv::Point2f patch_center = { + std::clamp<float>(box.x + 0.5f*box.width, 0.f, frame.cols), + std::clamp<float>(box.y + 0.5f*box.height, 0.f, frame.rows) + }; + cv::getRectSubPix(frame, {patch_size, patch_size}, patch_center, cropped); + + // Will get failure if patch_center is outside image boundaries. + // Have to catch this case. + if (cropped.rows != patch_size || cropped.cols != patch_size) + return {}; + + auto p = input_mat.ptr(0); + + cv::resize(cropped, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA); + + // Automatic brightness amplification. + const int brightness = find_input_intensity_90_pct_quantile(); + const double alpha = brightness<127 ? 0.5/std::max(5,brightness) : 1./255; + const double beta = -0.5; + + scaled_frame.convertTo(input_mat, CV_32F, alpha, beta); + + assert (input_mat.ptr(0) == p); + assert (!input_mat.empty() && input_mat.isContinuous()); + assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height); + + const char* input_names[] = {"x"}; + const char* output_names[] = {"pos_size", "quat", "box"}; + + //Timer t_; t_.start(); + + const auto nt = omp_get_num_threads(); + omp_set_num_threads(num_threads); + session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, output_val, 3); + omp_set_num_threads(nt); + + // FIXME: Execution time fluctuates wildly. 19 to 26 ms. Why??? + // The instructions are always the same. Maybe a memory allocation + // issue. The ONNX api suggests that tensor are allocated in an + // arena. Does that matter? Maybe the issue is something else? + + //qDebug() << "pose net: " << t_.elapsed_ms() << " ms\n"; + + // Perform coordinate transformation. + // From patch-local normalized in [-1,1] to + // frame unnormalized pixel coordinates. + + const cv::Point2f center = patch_center + + (0.5f*patch_size)*cv::Point2f{output_coord[0], output_coord[1]}; + + const float size = patch_size*0.5f*output_coord[2]; + + // Following Eigen which uses quat components in the order w, x, y, z. + const std::array<float,4> rotation = { + output_quat[3], + output_quat[0], + output_quat[1], + output_quat[2] }; + + const cv::Rect2f outbox = { + patch_center.x + (0.5f*patch_size)*output_box[0], + patch_center.y + (0.5f*patch_size)*output_box[1], + 0.5f*patch_size*(output_box[2]-output_box[0]), + 0.5f*patch_size*(output_box[3]-output_box[1]) + }; + + return std::optional<Face>({ + rotation, outbox, center, size + }); +} + + +cv::Mat PoseEstimator::last_network_input() const +{ + cv::Mat ret; + if (!input_mat.empty()) + { + input_mat.convertTo(ret, CV_8U, 255., 127.); + cv::cvtColor(ret, ret, cv::COLOR_GRAY2RGB); + } + return ret; +} + + +bool neuralnet_tracker::detect() +{ + // Note: BGR colors! + if (!last_localizer_roi || !last_roi || + iou(*last_localizer_roi,*last_roi)<0.25) + { + auto [p, rect] = localizer->run(grayscale); + if (p > 0.5) + { + last_localizer_roi = rect; + last_roi = rect; + } + } + + if (!last_roi) + return false; + + auto face = poseestimator->run(grayscale, *last_roi); + + if (!face) + { + last_roi.reset(); + return false; + } + + last_roi = face->box; + + Affine pose = compute_pose(*face); + + draw_gizmos(frame, *face, pose); + + { + QMutexLocker lck(&mtx); + this->pose_ = pose; + } + + return true; +} + + +Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const +{ + const mat33 rot_correction = compute_rotation_correction( + normalize(face.center, frame.rows, frame.cols), + intrinsics.focal_length_w); + + const mat33 m = rot_correction * quaternion_to_mat33(face.rotation); + + /* + + hhhhhh <- head size (meters) + \ | ----------------------- + \ | \ + \ | | + \ | |- tz (meters) + ____ <- face.size / width | + \ | | | + \| |- focal length / + ------------------------ + */ + + // Compute the location the network outputs in 3d space. + const vec3 face_world_pos = image_to_world(face.center.x, face.center.y, face.size, head_size_mm); + + // But this is in general not the location of the rotation joint in the neck. + // So we need an extra offset. Which we determine by solving + // z,y,z-pos = head_joint_loc + R_face * offset + + const vec3 pos = face_world_pos + + m * vec3{ + static_cast<float>(s.offset_fwd), + static_cast<float>(s.offset_up), + static_cast<float>(s.offset_right)}; + + return { m, pos }; +} + + +void neuralnet_tracker::draw_gizmos( + cv::Mat frame, + const PoseEstimator::Face &face, + const Affine& pose) const +{ + if (last_roi) + { + const int col = 255; + cv::rectangle(frame, *last_roi, cv::Scalar(0, 255, 0), /*thickness=*/1); + } + if (last_localizer_roi) + { + const int col = 255; + cv::rectangle(frame, *last_localizer_roi, cv::Scalar(col, 0, 255-col), /*thickness=*/1); + } + + if (face.size>=1.f) + cv::circle(frame, static_cast<cv::Point>(face.center), int(face.size), cv::Scalar(255,255,255), 2); + cv::circle(frame, static_cast<cv::Point>(face.center), 3, cv::Scalar(255,255,255), -1); + + auto draw_coord_line = [&](int i, const cv::Scalar& color) + { + const float vx = -pose.R(2,i); + const float vy = -pose.R(1,i); + static constexpr float len = 100.f; + cv::Point q = face.center + len*cv::Point2f{vx, vy}; + cv::line(frame, static_cast<cv::Point>(face.center), static_cast<cv::Point>(q), color, 2); + }; + draw_coord_line(0, {0, 0, 255}); + draw_coord_line(1, {0, 255, 0}); + draw_coord_line(2, {255, 0, 0}); + + if (s.show_network_input) + { + cv::Mat netinput = poseestimator->last_network_input(); + if (!netinput.empty()) + { + const int w = std::min(netinput.cols, frame.cols); + const int h = std::min(netinput.rows, frame.rows); + cv::Rect roi(0, 0, w, h); + netinput(roi).copyTo(frame(roi)); + } + } + { + // Draw the computed joint position + auto xy = world_to_image(pose.t); + cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1); + } + + char buf[128]; + ::snprintf(buf, sizeof(buf), "%d Hz, Max: %d ms", clamp(int(fps), 0, 9999), int(max_frame_time*1000.)); + cv::putText(frame, buf, cv::Point(10, frame.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1); +} + + +neuralnet_tracker::neuralnet_tracker() +{ + opencv_init(); + cv::setNumThreads(num_threads); +} + + +neuralnet_tracker::~neuralnet_tracker() +{ + requestInterruption(); + wait(); + // fast start/stop causes breakage + portable::sleep(1000); +} + + +module_status neuralnet_tracker::start_tracker(QFrame* videoframe) +{ + videoframe->show(); + videoWidget = std::make_unique<cv_video_widget>(videoframe); + layout = std::make_unique<QHBoxLayout>(); + layout->setContentsMargins(0, 0, 0, 0); + layout->addWidget(videoWidget.get()); + videoframe->setLayout(layout.get()); + videoWidget->show(); + start(); + return status_ok(); +} + + +bool neuralnet_tracker::load_and_initialize_model() +{ + const QString localizer_model_path_enc = + OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-localizer.onnx"; + const QString poseestimator_model_path_enc = + OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-pose.onnx"; + + try + { + env = Ort::Env{ + OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR, + "tracker-neuralnet" + }; + auto opts = Ort::SessionOptions{}; + // Do thread settings here do anything? + // There is a warning which says to control number of threads via + // openmp settings. Which is what we do. omp_set_num_threads directly + // before running the inference pass. + opts.SetIntraOpNumThreads(num_threads); + opts.SetInterOpNumThreads(num_threads); + opts.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + + opts.EnableCpuMemArena(); + allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + localizer.emplace( + allocator_info, + Ort::Session{env, convert(localizer_model_path_enc).c_str(), opts}); + + poseestimator.emplace( + allocator_info, + Ort::Session{env, convert(poseestimator_model_path_enc).c_str(), opts}); + } + catch (const Ort::Exception &e) + { + qDebug() << "Failed to initialize the neural network models. ONNX error message: " + << e.what(); + return false; + } + return true; +} + + +bool neuralnet_tracker::open_camera() +{ + int fps = enum_to_fps(s.force_fps); + + QMutexLocker l(&camera_mtx); + + camera = video::make_camera(s.camera_name); + + if (!camera) + return false; + + video::impl::camera::info args {}; + + args.width = 320; + args.height = 240; + + if (fps) + args.fps = fps; + + if (!camera->start(args)) + { + qDebug() << "neuralnet tracker: can't open camera"; + return false; + } + return true; +} + + +void neuralnet_tracker::set_intrinsics() +{ + const int w = grayscale.cols, h = grayscale.rows; + const double diag_fov = s.fov * M_PI / 180.; + const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w)); + const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h)); + const double focal_length_w = 1. / tan(.5 * fov_w); + const double focal_length_h = 1. / tan(.5 * fov_h); + + intrinsics.fov_h = fov_h; + intrinsics.fov_w = fov_w; + intrinsics.focal_length_w = focal_length_w; + intrinsics.focal_length_h = focal_length_h; +} + + +vec3 neuralnet_tracker::image_to_world(float x, float y, float size, float real_size) const +{ + // Compute the location the network outputs in 3d space. + const float xpos = -(intrinsics.focal_length_w * frame.cols * 0.5f) / size * real_size; + const float zpos = (x / frame.cols * 2.f - 1.f) * xpos / intrinsics.focal_length_w; + const float ypos = (y / frame.rows * 2.f - 1.f) * xpos / intrinsics.focal_length_h; + return {xpos, ypos, zpos}; +} + + +vec2 neuralnet_tracker::world_to_image(const vec3& pos) const +{ + const float xscr = pos[2] / pos[0] * intrinsics.focal_length_w; + const float yscr = pos[1] / pos[0] * intrinsics.focal_length_h; + const float x = (xscr+1.)*0.5f*frame.cols; + const float y = (yscr+1.)*0.5f*frame.rows; + return {x, y}; +} + + +void neuralnet_tracker::run() +{ + if (!open_camera()) + return; + + if (!load_and_initialize_model()) + return; + + std::chrono::high_resolution_clock clk; + + while (!isInterruptionRequested()) + { + auto t = clk.now(); + { + QMutexLocker l(&camera_mtx); + + auto [ img, res ] = camera->get_frame(); + + if (!res) + { + l.unlock(); + portable::sleep(100); + continue; + } + + auto color = cv::Mat(img.height, img.width, CV_8UC(img.channels), (void*)img.data, img.stride); + color.copyTo(frame); + + switch (img.channels) + { + case 1: + grayscale.setTo(color); + break; + case 3: + cv::cvtColor(color, grayscale, cv::COLOR_BGR2GRAY); + break; + default: + qDebug() << "Can't handle" << img.channels << "color channels"; + return; + } + } + + set_intrinsics(); + + detect(); + + if (frame.rows > 0) + videoWidget->update_image(frame); + + update_fps( + std::chrono::duration_cast<std::chrono::milliseconds>( + clk.now() - t).count()*1.e-3); + } +} + + +void neuralnet_tracker::update_fps(double dt) +{ + const double alpha = dt/(dt + RC); + + if (dt > 1e-6) + { + fps *= 1 - alpha; + fps += alpha * 1./dt; + } + + max_frame_time = std::max(max_frame_time, dt); +} + + +void neuralnet_tracker::data(double *data) +{ + Affine tmp = [&]() + { + QMutexLocker lck(&mtx); + return pose_; + }(); + + const auto& mx = tmp.R.col(0); + const auto& my = tmp.R.col(1); + const auto& mz = -tmp.R.col(2); + + const float yaw = std::atan2(mx(2), mx(0)); + const float pitch = -std::atan2(-mx(1), std::sqrt(mx(2)*mx(2)+mx(0)*mx(0))); + const float roll = std::atan2(-my(2), mz(2)); + { + constexpr double rad2deg = 180/M_PI; + data[Yaw] = rad2deg * yaw; + data[Pitch] = rad2deg * pitch; + data[Roll] = rad2deg * roll; + + // convert to cm + data[TX] = -tmp.t[2] * 0.1; + data[TY] = tmp.t[1] * 0.1; + data[TZ] = -tmp.t[0] * 0.1; + } +} + + +Affine neuralnet_tracker::pose() +{ + QMutexLocker lck(&mtx); + return pose_; +} + + +void neuralnet_dialog::make_fps_combobox() +{ + for (int k = 0; k < fps_MAX; k++) + { + const int hz = enum_to_fps(k); + const QString name = (hz == 0) ? tr("Default") : QString::number(hz); + ui.cameraFPS->addItem(name, k); + } +} + + +neuralnet_dialog::neuralnet_dialog() : + trans_calib(1, 2) +{ + ui.setupUi(this); + + make_fps_combobox(); + tie_setting(s.force_fps, ui.cameraFPS); + + for (const auto& str : video::camera_names()) + ui.cameraName->addItem(str); + + tie_setting(s.camera_name, ui.cameraName); + tie_setting(s.fov, ui.cameraFOV); + tie_setting(s.offset_fwd, ui.tx_spin); + tie_setting(s.offset_up, ui.ty_spin); + tie_setting(s.offset_right, ui.tz_spin); + tie_setting(s.show_network_input, ui.showNetworkInput); + + connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); + connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); + connect(ui.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings())); + + connect(&s.camera_name, value_::value_changed<QString>(), this, &neuralnet_dialog::update_camera_settings_state); + + update_camera_settings_state(s.camera_name); + + connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step); + calib_timer.setInterval(35); + connect(ui.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool))); +} + + +void neuralnet_dialog::doOK() +{ + s.b->save(); + close(); +} + + +void neuralnet_dialog::doCancel() +{ + close(); +} + + +void neuralnet_dialog::camera_settings() +{ + if (tracker) + { + QMutexLocker l(&tracker->camera_mtx); + (void)tracker->camera->show_dialog(); + } + else + (void)video::show_dialog(s.camera_name); +} + + +void neuralnet_dialog::update_camera_settings_state(const QString& name) +{ + (void)name; + ui.camera_settings->setEnabled(true); +} + + +void neuralnet_dialog::register_tracker(ITracker * x) +{ + tracker = static_cast<neuralnet_tracker*>(x); + ui.tcalib_button->setEnabled(true); +} + + +void neuralnet_dialog::unregister_tracker() +{ + tracker = nullptr; + ui.tcalib_button->setEnabled(false); +} + + +void neuralnet_dialog::trans_calib_step() +{ + if (tracker) + { + const Affine X_CM = [&]() { + QMutexLocker l(&calibrator_mutex); + return tracker->pose(); + }(); + trans_calib.update(X_CM.R, X_CM.t); + auto [_, nsamples] = trans_calib.get_estimate(); + + constexpr int min_yaw_samples = 15; + constexpr int min_pitch_samples = 12; + constexpr int min_samples = min_yaw_samples+min_pitch_samples; + + // Don't bother counting roll samples. Roll calibration is hard enough + // that it's a hidden unsupported feature anyway. + + QString sample_feedback; + if (nsamples[0] < min_yaw_samples) + sample_feedback = tr("%1 yaw samples. Yaw more to %2 samples for stable calibration.").arg(nsamples[0]).arg(min_yaw_samples); + else if (nsamples[1] < min_pitch_samples) + sample_feedback = tr("%1 pitch samples. Pitch more to %2 samples for stable calibration.").arg(nsamples[1]).arg(min_pitch_samples); + else + { + const int nsamples_total = nsamples[0] + nsamples[1]; + sample_feedback = tr("%1 samples. Over %2, good!").arg(nsamples_total).arg(min_samples); + } + ui.sample_count_display->setText(sample_feedback); + } + else + startstop_trans_calib(false); +} + + +void neuralnet_dialog::startstop_trans_calib(bool start) +{ + QMutexLocker l(&calibrator_mutex); + // FIXME: does not work ... + if (start) + { + qDebug() << "pt: starting translation calibration"; + calib_timer.start(); + trans_calib.reset(); + ui.sample_count_display->setText(QString()); + // Tracker must run with zero'ed offset for calibration. + s.offset_fwd = 0; + s.offset_up = 0; + s.offset_right = 0; + } + else + { + calib_timer.stop(); + qDebug() << "pt: stopping translation calibration"; + { + auto [tmp, nsamples] = trans_calib.get_estimate(); + s.offset_fwd = int(tmp[0]); + s.offset_up = int(tmp[1]); + s.offset_right = int(tmp[2]); + } + } + ui.tx_spin->setEnabled(!start); + ui.ty_spin->setEnabled(!start); + ui.tz_spin->setEnabled(!start); + + if (start) + ui.tcalib_button->setText(tr("Stop calibration")); + else + ui.tcalib_button->setText(tr("Start calibration")); +} + + +settings::settings() : opts("neuralnet-tracker") {} + +} // neuralnet_tracker_ns + +OPENTRACK_DECLARE_TRACKER(neuralnet_tracker, neuralnet_dialog, neuralnet_metadata) diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h new file mode 100644 index 00000000..e26689a4 --- /dev/null +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -0,0 +1,231 @@ +/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + */ + +#pragma once + +#include "options/options.hpp" +#include "api/plugin-api.hpp" +#include "cv/video-widget.hpp" +#include "cv/translation-calibrator.hpp" +#include "cv/numeric.hpp" +#include "compat/timer.hpp" +#include "video/camera.hpp" +#include "cv/affine.hpp" + +#include <QObject> +#include <QThread> +#include <QMutex> +#include <QHBoxLayout> +#include <QDialog> +#include <QTimer> + +#include <memory> +#include <cinttypes> + +#include <onnxruntime_cxx_api.h> + +#include <opencv2/core.hpp> +#include <opencv2/core/types.hpp> +#include <opencv2/imgproc.hpp> + +#include "ui_neuralnet-trackercontrols.h" + +namespace neuralnet_tracker_ns +{ + + +using namespace options; + + +enum fps_choices +{ + fps_default = 0, + fps_30 = 1, + fps_60 = 2, + fps_MAX = 3 +}; + + +struct settings : opts { + value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters + offset_up { b, "offset-up", 0 }, + offset_right { b, "offset-right", 0 }; + value<QString> camera_name { b, "camera-name", ""}; + value<int> fov { b, "field-of-view", 56 }; + value<fps_choices> force_fps { b, "force-fps", fps_default }; + value<bool> show_network_input { b, "show-network-input", false }; + settings(); +}; + + +struct CamIntrinsics +{ + float focal_length_w; + float focal_length_h; + float fov_w; + float fov_h; +}; + + +class Localizer +{ + public: + Localizer(Ort::MemoryInfo &allocator_info, + Ort::Session &&session); + + // Returns bounding wrt image coordinate of the input image + // The preceeding float is the score for being a face normalized to [0,1]. + std::pair<float, cv::Rect2f> run( + const cv::Mat &frame); + + private: + inline static constexpr int input_img_width = 288; + inline static constexpr int input_img_height = 224; + Ort::Session session{nullptr}; + // Inputs / outputs + cv::Mat scaled_frame{}, input_mat{}; + Ort::Value input_val{nullptr}, output_val{nullptr}; + std::array<float, 5> results; +}; + + +class PoseEstimator +{ + public: + struct Face + { + std::array<float,4> rotation; // Quaternion, (w, x, y, z) + // The following quantities are defined wrt the image space of the input + cv::Rect2f box; + cv::Point2f center; + float size; + }; + + PoseEstimator(Ort::MemoryInfo &allocator_info, + Ort::Session &&session); + // Inference + std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box); + // Returns an image compatible with the 'frame' image for displaying. + cv::Mat last_network_input() const; + + private: + // Operates on the private image data members + int find_input_intensity_90_pct_quantile() const; + + inline static constexpr int input_img_width = 129; + inline static constexpr int input_img_height = 129; + Ort::Session session{nullptr}; + // Inputs + cv::Mat scaled_frame{}, input_mat{}; + Ort::Value input_val{nullptr}; + // Outputs + cv::Vec<float, 3> output_coord{}; + cv::Vec<float, 4> output_quat{}; + cv::Vec<float, 4> output_box{}; + Ort::Value output_val[3] = { + Ort::Value{nullptr}, + Ort::Value{nullptr}, + Ort::Value{nullptr}}; +}; + + +class neuralnet_tracker : protected virtual QThread, public ITracker +{ + Q_OBJECT +public: + neuralnet_tracker(); + ~neuralnet_tracker() override; + module_status start_tracker(QFrame* frame) override; + void data(double *data) override; + void run() override; + Affine pose(); + + QMutex camera_mtx; + std::unique_ptr<video::impl::camera> camera; + +private: + bool detect(); + bool open_camera(); + void set_intrinsics(); + bool load_and_initialize_model(); + void draw_gizmos( + cv::Mat frame, + const PoseEstimator::Face &face, + const Affine& pose) const; + void update_fps(double dt); + + Affine compute_pose(const PoseEstimator::Face &face) const; + numeric_types::vec3 image_to_world(float x, float y, float size, float real_size) const; + numeric_types::vec2 world_to_image(const numeric_types::vec3& p) const; + + settings s; + std::optional<Localizer> localizer; + std::optional<PoseEstimator> poseestimator; + Ort::Env env{nullptr}; + Ort::MemoryInfo allocator_info{nullptr}; + + CamIntrinsics intrinsics{}; + cv::Mat frame, grayscale; + std::optional<cv::Rect2f> last_localizer_roi; + std::optional<cv::Rect2f> last_roi; + static constexpr float head_size_mm = 200.f; + + double fps = 0; + double max_frame_time = 0; + static constexpr double RC = .25; + + QMutex mtx; // Protects the pose + Affine pose_; + + std::unique_ptr<cv_video_widget> videoWidget; + std::unique_ptr<QHBoxLayout> layout; +}; + + +class neuralnet_dialog : public ITrackerDialog +{ + Q_OBJECT +public: + neuralnet_dialog(); + void register_tracker(ITracker * x) override; + void unregister_tracker() override; +private: + void make_fps_combobox(); + + Ui::Form ui; + settings s; + + // Calibration code mostly taken from point tracker + QTimer calib_timer; + TranslationCalibrator trans_calib; + QMutex calibrator_mutex; + + neuralnet_tracker* tracker = nullptr; + +private Q_SLOTS: + void doOK(); + void doCancel(); + void camera_settings(); + void update_camera_settings_state(const QString& name); + void startstop_trans_calib(bool start); + void trans_calib_step(); +}; + + +class neuralnet_metadata : public Metadata +{ + Q_OBJECT + QString name() override { return QString("neuralnet tracker"); } + QIcon icon() override { return QIcon(":/images/neuralnet.png"); } +}; + + +} // neuralnet_tracker_ns + +using neuralnet_tracker_ns::neuralnet_tracker; +using neuralnet_tracker_ns::neuralnet_dialog; +using neuralnet_tracker_ns::neuralnet_metadata;
\ No newline at end of file diff --git a/tracker-neuralnet/images/neuralnet.png b/tracker-neuralnet/images/neuralnet.png Binary files differnew file mode 100644 index 00000000..1a10c53c --- /dev/null +++ b/tracker-neuralnet/images/neuralnet.png diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts new file mode 100644 index 00000000..fb6c3348 --- /dev/null +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -0,0 +1,91 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1" language="nl_NL"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation>Tracker-instellingen</translation> + </message> + <message> + <source>Frames per second</source> + <translation>Frames per seconde</translation> + </message> + <message> + <source>Camera name</source> + <translation>Cameranaam</translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation>Diagonale FOV</translation> + </message> + <message> + <source>Camera settings</source> + <translation>Camera-instellingen</translation> + </message> + <message> + <source>Camera Configuration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Head Center Offset</source> + <translation type="unfinished"></translation> + </message> + <message> + <source> mm</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Right</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Forward</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Up</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show Network Input</source> + <translation type="unfinished"></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::neuralnet_dialog</name> + <message> + <source>Default</source> + <translation type="unfinished">Standaard</translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Stop calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts new file mode 100644 index 00000000..f1ba9a92 --- /dev/null +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -0,0 +1,91 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1" language="ru_RU"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera settings</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Frames per second</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera name</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera Configuration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Head Center Offset</source> + <translation type="unfinished"></translation> + </message> + <message> + <source> mm</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Right</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Forward</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Up</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show Network Input</source> + <translation type="unfinished"></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::neuralnet_dialog</name> + <message> + <source>Default</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Stop calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts new file mode 100644 index 00000000..52b8aded --- /dev/null +++ b/tracker-neuralnet/lang/stub.ts @@ -0,0 +1,91 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera settings</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Frames per second</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera name</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera Configuration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Head Center Offset</source> + <translation type="unfinished"></translation> + </message> + <message> + <source> mm</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Right</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Forward</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Up</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show Network Input</source> + <translation type="unfinished"></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::neuralnet_dialog</name> + <message> + <source>Default</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Stop calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts new file mode 100644 index 00000000..2d0dd8ff --- /dev/null +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -0,0 +1,91 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera name</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Frames per second</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera settings</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera Configuration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Head Center Offset</source> + <translation type="unfinished"></translation> + </message> + <message> + <source> mm</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Right</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Forward</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Up</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show Network Input</source> + <translation type="unfinished"></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::neuralnet_dialog</name> + <message> + <source>Default</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Stop calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/models/head-localizer.onnx b/tracker-neuralnet/models/head-localizer.onnx Binary files differnew file mode 100644 index 00000000..c128f89d --- /dev/null +++ b/tracker-neuralnet/models/head-localizer.onnx diff --git a/tracker-neuralnet/models/head-pose.onnx b/tracker-neuralnet/models/head-pose.onnx Binary files differnew file mode 100644 index 00000000..dcb55dcc --- /dev/null +++ b/tracker-neuralnet/models/head-pose.onnx diff --git a/tracker-neuralnet/neuralnet-tracker.qrc b/tracker-neuralnet/neuralnet-tracker.qrc new file mode 100644 index 00000000..d30ec313 --- /dev/null +++ b/tracker-neuralnet/neuralnet-tracker.qrc @@ -0,0 +1,5 @@ +<RCC> + <qresource prefix="/"> + <file>images/neuralnet.png</file> + </qresource> +</RCC> diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui new file mode 100644 index 00000000..f16b5807 --- /dev/null +++ b/tracker-neuralnet/neuralnet-trackercontrols.ui @@ -0,0 +1,375 @@ +<?xml version="1.0" encoding="UTF-8"?> +<ui version="4.0"> + <class>Form</class> + <widget class="QWidget" name="Form"> + <property name="windowModality"> + <enum>Qt::NonModal</enum> + </property> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>727</width> + <height>202</height> + </rect> + </property> + <property name="windowTitle"> + <string>Tracker settings</string> + </property> + <layout class="QGridLayout" name="gridLayout"> + <item row="5" column="0"> + <widget class="QDialogButtonBox" name="buttonBox"> + <property name="standardButtons"> + <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> + </property> + </widget> + </item> + <item row="3" column="0"> + <widget class="QFrame" name="frame_3"> + <property name="frameShape"> + <enum>QFrame::StyledPanel</enum> + </property> + <property name="frameShadow"> + <enum>QFrame::Raised</enum> + </property> + <layout class="QHBoxLayout" name="horizontalLayout"> + <property name="spacing"> + <number>0</number> + </property> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QGroupBox" name="groupBox"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Fixed" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="title"> + <string>Camera Configuration</string> + </property> + <layout class="QGridLayout" name="gridLayout_4"> + <item row="2" column="1"> + <widget class="QComboBox" name="cameraName"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + </widget> + </item> + <item row="0" column="1"> + <widget class="QSpinBox" name="cameraFOV"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="locale"> + <locale language="English" country="UnitedStates"/> + </property> + <property name="minimum"> + <number>35</number> + </property> + <property name="maximum"> + <number>90</number> + </property> + </widget> + </item> + <item row="1" column="0"> + <widget class="QLabel" name="label_12"> + <property name="text"> + <string>Frames per second</string> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QLabel" name="label_10"> + <property name="text"> + <string>Camera name</string> + </property> + </widget> + </item> + <item row="0" column="0"> + <widget class="QLabel" name="label_9"> + <property name="text"> + <string>Diagonal FOV</string> + </property> + </widget> + </item> + <item row="1" column="1"> + <widget class="QComboBox" name="cameraFPS"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + </widget> + </item> + <item row="3" column="1"> + <widget class="QPushButton" name="camera_settings"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Maximum"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>Camera settings</string> + </property> + </widget> + </item> + </layout> + </widget> + </item> + <item> + <widget class="QGroupBox" name="groupBox_10"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="title"> + <string>Head Center Offset</string> + </property> + <layout class="QGridLayout" name="gridLayout_5"> + <item row="0" column="0"> + <widget class="QFrame" name="frame_4"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="maximumSize"> + <size> + <width>16777215</width> + <height>16777215</height> + </size> + </property> + <property name="frameShape"> + <enum>QFrame::NoFrame</enum> + </property> + <property name="frameShadow"> + <enum>QFrame::Raised</enum> + </property> + <layout class="QGridLayout" name="gridLayout_11"> + <item row="1" column="1"> + <widget class="QSpinBox" name="ty_spin"> + <property name="maximumSize"> + <size> + <width>150</width> + <height>16777215</height> + </size> + </property> + <property name="suffix"> + <string> mm</string> + </property> + <property name="minimum"> + <number>-65535</number> + </property> + <property name="maximum"> + <number>65536</number> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QLabel" name="label_66"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>Right</string> + </property> + </widget> + </item> + <item row="2" column="1"> + <widget class="QSpinBox" name="tz_spin"> + <property name="maximumSize"> + <size> + <width>150</width> + <height>16777215</height> + </size> + </property> + <property name="suffix"> + <string> mm</string> + </property> + <property name="minimum"> + <number>-65535</number> + </property> + <property name="maximum"> + <number>65536</number> + </property> + </widget> + </item> + <item row="0" column="0"> + <widget class="QLabel" name="label_61"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>Forward</string> + </property> + </widget> + </item> + <item row="0" column="1"> + <widget class="QSpinBox" name="tx_spin"> + <property name="maximumSize"> + <size> + <width>150</width> + <height>16777215</height> + </size> + </property> + <property name="suffix"> + <string> mm</string> + </property> + <property name="minimum"> + <number>-65535</number> + </property> + <property name="maximum"> + <number>65536</number> + </property> + </widget> + </item> + <item row="1" column="0"> + <widget class="QLabel" name="label_62"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>Up</string> + </property> + </widget> + </item> + </layout> + </widget> + </item> + <item row="0" column="1"> + <widget class="QFrame" name="frame_5"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Expanding"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="minimumSize"> + <size> + <width>260</width> + <height>0</height> + </size> + </property> + <property name="frameShape"> + <enum>QFrame::NoFrame</enum> + </property> + <property name="frameShadow"> + <enum>QFrame::Raised</enum> + </property> + <layout class="QVBoxLayout" name="verticalLayout_2"> + <item> + <widget class="QLabel" name="label_59"> + <property name="text"> + <string>Use only yaw and pitch while calibrating. +Don't roll or change position.</string> + </property> + <property name="alignment"> + <set>Qt::AlignCenter</set> + </property> + <property name="wordWrap"> + <bool>true</bool> + </property> + <property name="openExternalLinks"> + <bool>false</bool> + </property> + </widget> + </item> + <item> + <widget class="QLabel" name="sample_count_display"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Minimum" vsizetype="Maximum"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string/> + </property> + <property name="wordWrap"> + <bool>true</bool> + </property> + </widget> + </item> + <item> + <widget class="QPushButton" name="tcalib_button"> + <property name="enabled"> + <bool>false</bool> + </property> + <property name="text"> + <string>Start calibration</string> + </property> + <property name="checkable"> + <bool>true</bool> + </property> + </widget> + </item> + </layout> + </widget> + </item> + </layout> + </widget> + </item> + </layout> + </widget> + </item> + <item row="4" column="0"> + <widget class="QCheckBox" name="showNetworkInput"> + <property name="text"> + <string>Show Network Input</string> + </property> + </widget> + </item> + </layout> + </widget> + <resources/> + <connections/> + <designerdata> + <property name="gridDeltaX"> + <number>10</number> + </property> + <property name="gridDeltaY"> + <number>10</number> + </property> + <property name="gridSnapX"> + <bool>false</bool> + </property> + <property name="gridSnapY"> + <bool>false</bool> + </property> + <property name="gridVisible"> + <bool>true</bool> + </property> + </designerdata> +</ui> |