diff options
Diffstat (limited to 'tracker-neuralnet')
26 files changed, 4073 insertions, 0 deletions
diff --git a/tracker-neuralnet/BUILD.md b/tracker-neuralnet/BUILD.md new file mode 100644 index 00000000..b8994b00 --- /dev/null +++ b/tracker-neuralnet/BUILD.md @@ -0,0 +1,20 @@ +ONNX Runtime +------------ + +Recommended approach on Windws: Build a shared library from sources. Use static MSVC +runtime library. The v1.6.0 branch should work fine. + +Source location: https://github.com/microsoft/onnxruntime + +In order to build, execute `build.bat` as follows: + +``` +$ build.bat --config Release --x86 --cmake_extra_defines CMAKE_INSTALL_PREFIX="D:\Dev\onnxruntime-x86-release" --build_dir .\buildx86\ --enable_msvc_static_runtime --build_shared_lib --skip_tests --cmake_generator "Visual Studio 15 2017" +$ cmake --install .\buildx64\Release +``` + +Replace the argument for `--cmake_generator` if needed. Also adjust the build-and install directories. + +This should place all required files in the directory specified by CMAKE_INSTALL_PREFIX. + +See also https://www.onnxruntime.ai/docs/how-to/build.html.
\ No newline at end of file diff --git a/tracker-neuralnet/CMakeLists.txt b/tracker-neuralnet/CMakeLists.txt new file mode 100644 index 00000000..3729c789 --- /dev/null +++ b/tracker-neuralnet/CMakeLists.txt @@ -0,0 +1,49 @@ +include(opentrack-opencv) +set(host-spec "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_SIZEOF_VOID_P}") +if(host-spec MATCHES "^Linux i[3-6]86 4$") + return() +endif() + +find_package(OpenCV QUIET) +find_package(OpenMP QUIET) # Used to control number of onnx threads. +find_package(ONNXRuntime QUIET) + +if(OpenCV_FOUND AND ONNXRuntime_FOUND AND OpenMP_FOUND) + if(MSVC) + add_compile_options(-EHsc) + add_definitions(-D_HAS_EXCEPTIONS=1) + endif() + + otr_module(tracker-neuralnet) + + target_link_libraries(${self} + opentrack-cv + onnxruntime::onnxruntime + opencv_calib3d + opencv_imgproc + opencv_imgcodecs + opencv_core + OpenMP::OpenMP_CXX + ) + + # OpenMP::OpenMP_CXX doesn't set up the -fopenmp linking option, so set it up ourselves. + if(NOT MSVC) + target_link_options(${self} PUBLIC ${OpenMP_CXX_FLAGS}) + endif() + + install( + FILES "models/head-localizer.onnx" + "models/head-pose-0.2-big.onnx" + "models/head-pose-0.2-small.onnx" + "models/head-pose-0.3-big-quantized.onnx" + DESTINATION "${opentrack-libexec}/models" + PERMISSIONS ${opentrack-perms-file} + ) + + if(WIN32) + otr_install_lib("${ONNXRuntime_RUNTIME}" ".") + endif() + if(MSVC) + otr_install_lib("redist/vcomp140.dll" "${opentrack-bin}") + endif() +endif() diff --git a/tracker-neuralnet/deadzone_filter.cpp b/tracker-neuralnet/deadzone_filter.cpp new file mode 100644 index 00000000..fa96eeb3 --- /dev/null +++ b/tracker-neuralnet/deadzone_filter.cpp @@ -0,0 +1,173 @@ +#include "deadzone_filter.h" +#include "model_adapters.h" +#include "opencv_contrib.h" +#include "unscented_trafo.h" + +#include <tuple> +#include <opencv2/core/base.hpp> +#include <opencv2/core/matx.hpp> +#include <opencv2/core/quaternion.hpp> + +namespace neuralnet_tracker_ns +{ + +using namespace cvcontrib; + +// Number of degrees of freedom of position and rotation +static constexpr int dofs = 6; + +using StateVec = cv::Vec<float,dofs>; +using StateCov = cv::Matx<float,dofs,dofs>; + +static constexpr int num_sigmas = ukf_cv::MerweScaledSigmaPoints<dofs>::num_sigmas; +// Rescaling factor for position/size living in the space of the face crop. +// Applied prior to application of UKF to prevent numerical problems. +static constexpr float img_scale = 200.f; +// Similar rescaling factor for position/size that live in world space. +static constexpr float world_scale = 1000.f; // mm + +// Fills the 6 DoF covariance factor, as in L L^T factorization. +// Covariance is given wrt the tangent space of current predictions +StateCov make_tangent_space_uncertainty_tril(const PoseEstimator::Face &face) +{ + StateCov tril = StateCov::eye(); + set_minor<3,3>(tril, 0, 0, face.center_size_cov_tril / img_scale); + set_minor<3,3>(tril, 3, 3, face.rotaxis_cov_tril); + return tril; +} + + +QuatPose apply_offset(const QuatPose& pose, const StateVec& offset) +{ + // Unpack + const cv::Vec3f dp = { offset[0], offset[1], offset[2] }; + const cv::Quatf dr = cv::Quatf::createFromRvec(cv::Vec3f{ offset[3], offset[4], offset[5] }); + const auto p = pose.pos + dp; + const auto r = pose.rot * dr; + return { r, p }; +} + + +std::tuple<cv::Quatf, cv::Point2f, float> apply_offset(const PoseEstimator::Face& face, const StateVec& offset) +{ + const cv::Quatf dr = cv::Quatf::createFromRvec(cv::Vec3f{ offset[3], offset[4], offset[5] }); + const auto r = face.rotation * dr; + + const cv::Point2f p = { + face.center.x + offset[0]*img_scale, + face.center.y + offset[1]*img_scale + }; + + // Intercept the case where the head size stddev became so large that the sigma points + // were created with negative head size (mean - constant*stddev ...). Negative head size + // is bad. But this is fine. The unscented transform where this function comes into play + // is designed to handle non-linearities like this. + const float sz = std::max(0.1f*face.size, face.size + offset[2]*img_scale); + + return { + r, + p, + sz, + }; +} + + +StateVec relative_to(const QuatPose& reference, const QuatPose& pose) +{ + const auto p = pose.pos - reference.pos; + const auto r = toRotVec(reference.rot.conjugate()*pose.rot); + return StateVec{ p[0], p[1], p[2], r[0], r[1], r[2] }; +} + + +ukf_cv::SigmaPoints<dofs> relative_to(const QuatPose& pose, const std::array<QuatPose,num_sigmas>& sigmas) +{ + ukf_cv::SigmaPoints<dofs> out; // Beware, the number of points is != the number of DoFs. + std::transform(sigmas.begin(), sigmas.end(), out.begin(), [&pose](const QuatPose& s) { + return relative_to(pose, s); + }); + return out; +} + + +std::array<QuatPose,num_sigmas> compute_world_pose_from_sigma_point(const PoseEstimator::Face& face, const ukf_cv::SigmaPoints<dofs>& sigmas, Face2WorldFunction face2world) +{ + std::array<QuatPose,num_sigmas> out; + std::transform(sigmas.begin(), sigmas.end(), out.begin(), [face2world=std::move(face2world), &face](const StateVec& sigma_point) { + // First unpack the state vector and generate quaternion rotation w.r.t image space. + const auto [rotation, center, size] = apply_offset(face, sigma_point); + // Then transform ... + QuatPose pose = face2world(rotation, center, size); + pose.pos /= world_scale; + return pose; + }); + return out; +} + + +StateVec apply_filter_to_offset(const StateVec& offset, const StateCov& offset_cov, float, const FiltParams& params) +{ + // Offset and Cov represent a multivariate normal distribution, which is the probability of the new pose measured w.r.t the previous one. + // Prob(x) ~exp(-(x-mu)t Cov^-1 (x-mu)) + // We want to attenuate this offset, or zero it out completely, to obtain a deadzone-filter behaviour. The size of the deadzone shall be + // determined by the covariance projected to the offset direction like so: + // Take x = mu - mu / |mu| * alpha + // p(alpha) ~exp(-alpha^2 / |mu|^2 * mut Cov^-1 mu) = ~exp(-alpha^2 / sigma^2) with sigma^2 = mut Cov^-1 mu / |mu|^2. + // So this projection is like a 1d normal distribution with some standard deviation, which we take to scale the deadzone. + + bool ok = true; + + const float len_div_sigma_sqr = offset.dot(offset_cov.inv(cv::DECOMP_CHOLESKY, &ok) * offset); + + const float attenuation = (ok) ? sigmoid((std::sqrt(len_div_sigma_sqr) - params.deadzone_size)*params.deadzone_hardness) : 1.f; + + // { + // std::cout << "cov diag: " << offset_cov.diag() << std::endl; + // std::cout << "offset: " << cv::norm(offset) << std::endl; + // std::cout << "len_div_sigma_sqr: " << cv::norm(len_div_sigma_sqr) << std::endl; + // std::cout << "attenuation (" << ok << "): " << attenuation << std::endl; + // } + + return offset*attenuation; +} + + +QuatPose apply_filter(const PoseEstimator::Face &face, const QuatPose& previous_pose_, float dt, Face2WorldFunction face2world, const FiltParams& params) +{ + ukf_cv::MerweScaledSigmaPoints<dofs> unscentedtrafo; + auto previous_pose = previous_pose_; + previous_pose.pos /= world_scale; + + // Get 6 DoF covariance factor for the predictions in the face crop space. + const auto cov_tril = make_tangent_space_uncertainty_tril(face); + + // Compute so called sigma points. These represent the distribution from the covariance matrix in terms of + // sampling points. + const ukf_cv::SigmaPoints<dofs> sigmas = unscentedtrafo.compute_sigmas(to_vec(StateVec::zeros()), cov_tril, true); + + // The filter uses an unscented transform to translate that into a distribution for the offset from the previous pose. + // The trick is to transform the sampling points and compute a covariance from them in the output space. + // We have many of these sigma points. This is why that callback comes into play here. + // The transform to 3d world space is more than Face2WorldFunction because we also need to apply the sigma point (as + // a relative offset) to the pose in face crop space. + const std::array<QuatPose,num_sigmas> pose_sigmas = compute_world_pose_from_sigma_point(face, sigmas, std::move(face2world)); + + // Compute sigma points relative to the previous pose + const ukf_cv::SigmaPoints<dofs> deltas_sigmas = relative_to(previous_pose, pose_sigmas); + + // Compute the mean offset from the last pose and the spread due to the networks uncertainty output. + const auto [offset, offset_cov] = unscentedtrafo.compute_statistics(deltas_sigmas); + + // Then the deadzone is applied to the offset and finally the previous pose is transformed by the offset to arrive + // at the final output. + const StateVec scaled_offset = apply_filter_to_offset(offset, offset_cov, dt, params); + + QuatPose new_pose = apply_offset(previous_pose, scaled_offset); + + new_pose.pos *= world_scale; + + return new_pose; +} + + +} // namespace neuralnet_tracker_ns
\ No newline at end of file diff --git a/tracker-neuralnet/deadzone_filter.h b/tracker-neuralnet/deadzone_filter.h new file mode 100644 index 00000000..a9b6aada --- /dev/null +++ b/tracker-neuralnet/deadzone_filter.h @@ -0,0 +1,37 @@ +#pragma once + +#include "unscented_trafo.h" +#include "opencv_contrib.h" +#include "model_adapters.h" + +namespace neuralnet_tracker_ns +{ + +/// Represents a 6d pose by quaternion rotation and position vector. +struct QuatPose { + cv::Quatf rot; + cv::Vec3f pos; +}; + +struct FiltParams +{ + float deadzone_hardness = 1.f; + float deadzone_size = 1.f; +}; + +/** Callback type for converting data from the `Face` struct to a 6d pose. +* +* This callback is needed because it depends on things that the filter doesn't have to know about and it is called multiple times +* due to the way how uncertainty estimates are handled +*/ +using Face2WorldFunction = std::function<QuatPose (const cv::Quatf&, const cv::Point2f&, float)>; + +/** Applies a deadzone filter similar to the one used in the Hamilton filter. +* +* What sets this apart is that the deadzone size scales with the uncertainty estimate of the network. +* The rotation uncertainty is represented by a covariance matrix for the distribution of a rotation vector which +* describes the offset from the mean rotation (the quaternion in the `Face` struct). +*/ +QuatPose apply_filter(const PoseEstimator::Face &face, const QuatPose& previous_pose, float dt, Face2WorldFunction face2world, const FiltParams& params); + +} // namespace neuralnet_tracker_ns
\ No newline at end of file diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp new file mode 100644 index 00000000..c55ddf0c --- /dev/null +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -0,0 +1,976 @@ +/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + */ + +#include "ftnoir_tracker_neuralnet.h" +#include "deadzone_filter.h" +#include "opencv_contrib.h" + +#include "compat/sleep.hpp" +#include "compat/math-imports.hpp" +#include "compat/timer.hpp" +#include "compat/check-visible.hpp" +#include "cv/init.hpp" + +#include <omp.h> +#include <onnxruntime_cxx_api.h> +#include <opencv2/core.hpp> +#include <opencv2/core/quaternion.hpp> + +#ifdef _MSC_VER +# pragma warning(disable : 4702) +#endif + +#include <QMutexLocker> +#include <QDebug> +#include <QFile> +#include <QFileDialog> +#include <QFileInfo> + +#include <cstdio> +#include <cmath> +#include <algorithm> +#include <chrono> +#include <string> +#include <stdexcept> +#include <unordered_map> + +// Some demo code for onnx +// https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp +// https://github.com/leimao/ONNX-Runtime-Inference/blob/main/src/inference.cpp + +namespace neuralnet_tracker_ns +{ + +using namespace cvcontrib; + +using numeric_types::vec3; +using numeric_types::vec2; +using numeric_types::mat33; + +#if _MSC_VER +std::wstring convert(const QString &s) { return s.toStdWString(); } +#else +std::string convert(const QString &s) { return s.toStdString(); } +#endif + + +QDir get_default_model_directory() +{ + return QDir(OPENTRACK_BASE_PATH+ "/" OPENTRACK_LIBRARY_PATH "models"); +} + + +int enum_to_fps(int value) +{ + int fps = 0; + + switch (value) + { + default: eval_once(qDebug() << "neuralnet tracker: invalid fps enum value"); + [[fallthrough]]; + case fps_default: fps = 0; break; + case fps_30: fps = 30; break; + case fps_60: fps = 60; break; + case fps_75: fps = 75; break; + case fps_125: fps = 125; break; + case fps_200: fps = 200; break; + case fps_50: fps = 50; break; + case fps_100: fps = 100; break; + case fps_120: fps = 120; break; + case fps_300: fps = 300; break; + case fps_250: fps = 250; break; + } + + return fps; +} + + +template<class F> +struct OnScopeExit +{ + explicit OnScopeExit(F&& f) : f_{ f } {} + ~OnScopeExit() noexcept + { + f_(); + } + F f_; +}; + + +CamIntrinsics make_intrinsics(const cv::Mat& img, const Settings& settings) +{ + const int w = img.cols, h = img.rows; + const double diag_fov = settings.fov * M_PI / 180.; + const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w)); + const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h)); + const double focal_length_w = 1. / tan(.5 * fov_w); + const double focal_length_h = 1. / tan(.5 * fov_h); + /* a + ______ <--- here is sensor area + | / + | / + f | / + | / 2 x angle is the fov + |/ + <--- here is the hole of the pinhole camera + + So, a / f = tan(fov / 2) + => f = a/tan(fov/2) + What is a? + 1 if we define f in terms of clip space where the image plane goes from -1 to 1. Because a is the half-width. + */ + + return { + (float)focal_length_w, + (float)focal_length_h, + (float)fov_w, + (float)fov_h + }; +} + + +cv::Rect make_crop_rect_multiple_of(const cv::Size &size, int multiple) +{ + const int new_w = (size.width / multiple) * multiple; + const int new_h = (size.height / multiple) * multiple; + return cv::Rect( + (size.width-new_w)/2, + (size.height-new_h)/2, + new_w, + new_h + ); +} + +template<class T> +cv::Rect_<T> squarize(const cv::Rect_<T> &r) +{ + cv::Point_<T> c{r.x + r.width/T(2), r.y + r.height/T(2)}; + const T sz = std::max(r.height, r.width); + return {c.x - sz/T(2), c.y - sz/T(2), sz, sz}; +} + + +template<class T> +cv::Rect_<T> expand(const cv::Rect_<T>& r, T factor) +{ + // xnew = l+.5*w - w*f*0.5 = l + .5*(w - new_w) + const cv::Size_<T> new_size = { r.width * factor, r.height * factor }; + const cv::Point_<T> new_tl = r.tl() + (as_point(r.size()) - as_point(new_size)) / T(2); + return cv::Rect_<T>(new_tl, new_size); +} + + +template<class T> +cv::Rect_<T> ewa_filter(const cv::Rect_<T>& last, const cv::Rect_<T>& current, T alpha) +{ + const auto last_center = T(0.5) * (last.tl() + last.br()); + const auto cur_center = T(0.5) * (current.tl() + current.br()); + const cv::Point_<T> new_size = as_point(last.size()) + alpha * (as_point(current.size()) - as_point(last.size())); + const cv::Point_<T> new_center = last_center + alpha * (cur_center - last_center); + return cv::Rect_<T>(new_center - T(0.5) * new_size, as_size(new_size)); +} + + +cv::Vec3f image_to_world(float x, float y, float size, float reference_size_in_mm, const cv::Size2i& image_size, const CamIntrinsics& intrinsics) +{ + /* + Compute the location the network outputs in 3d space. + + hhhhhh <- head size (meters) + \ | ----------------------- + \ | \ + \ | | + \ | |- x (meters) + ____ <- face.size / width | + \ | | | + \| |- focal length / + ------------------------ + ------------------------------------------------>> z direction + z/x = zi / f + zi = image position + z = world position + f = focal length + + We can also do deltas: + dz / x = dzi / f + => x = dz / dzi * f + which means we can compute x from the head size (dzi) if we assume some reference size (dz). + */ + const float head_size_vertical = 2.f*size; // Size from the model is more like half the real vertical size of a human head. + const float xpos = -(intrinsics.focal_length_w * image_size.width * 0.5f) / head_size_vertical * reference_size_in_mm; + const float zpos = (x / image_size.width * 2.f - 1.f) * xpos / intrinsics.focal_length_w; + const float ypos = (y / image_size.height * 2.f - 1.f) * xpos / intrinsics.focal_length_h; + return {xpos, ypos, zpos}; +} + + +vec2 world_to_image(const cv::Vec3f& pos, const cv::Size2i& image_size, const CamIntrinsics& intrinsics) +{ + const float xscr = pos[2] / pos[0] * intrinsics.focal_length_w; + const float yscr = pos[1] / pos[0] * intrinsics.focal_length_h; + const float x = (xscr+1.)*0.5f*image_size.width; + const float y = (yscr+1.)*0.5f*image_size.height; + return {x, y}; +} + + +cv::Quatf image_to_world(cv::Quatf q) +{ + std::swap(q[1], q[3]); + q[1] = -q[1]; + q[2] = -q[2]; + q[3] = -q[3]; + return q; +} + + +cv::Point2f normalize(const cv::Point2f &p, int h, int w) +{ + return { + p.x/w*2.f-1.f, + p.y/h*2.f-1.f + }; +} + + +cv::Quatf rotation_from_two_vectors(const vec3 &a, const vec3 &b) +{ + // |axis| = |a| * |b| * sin(alpha) + const vec3 axis = a.cross(b); + // dot = |a|*|b|*cos(alpha) + const float dot = a.dot(b); + const float len = cv::norm(axis); + vec3 normed_axis = axis / len; + float angle = std::atan2(len, dot); + if (!(std::isfinite(normed_axis[0]) && std::isfinite(normed_axis[1]) && std::isfinite(normed_axis[2]))) + { + angle = 0.f; + normed_axis = vec3{1.,0.,0.}; + } + return cv::Quatf::createFromAngleAxis(angle, normed_axis); +} + + +// Computes correction due to head being off screen center. +cv::Quatf compute_rotation_correction(const cv::Point3f& p) +{ + return rotation_from_two_vectors( + {-1.f,0.f,0.f}, p); +} + + +// Intersection over union. A value between 0 and 1 which measures the match between the bounding boxes. +template<class T> +T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b) +{ + auto i = a & b; + return double{i.area()} / (a.area()+b.area()-i.area()); +} + + +class GuardedThreadCountSwitch +{ + int old_num_threads_cv_ = 1; + int old_num_threads_omp_ = 1; + public: + GuardedThreadCountSwitch(int num_threads) + { + old_num_threads_cv_ = cv::getNumThreads(); + old_num_threads_omp_ = omp_get_num_threads(); + omp_set_num_threads(num_threads); + cv::setNumThreads(num_threads); + } + + ~GuardedThreadCountSwitch() + { + omp_set_num_threads(old_num_threads_omp_); + cv::setNumThreads(old_num_threads_cv_); + } + + GuardedThreadCountSwitch(const GuardedThreadCountSwitch&) = delete; + GuardedThreadCountSwitch& operator=(const GuardedThreadCountSwitch&) = delete; +}; + + +bool NeuralNetTracker::detect() +{ + double inference_time = 0.; + + OnScopeExit update_inference_time{ [&]() { + + QMutexLocker lck{ &stats_mtx_ }; + inference_time_ = inference_time; + } }; + + // If there is no past ROI from the localizer or if the match of its output + // with the current ROI is too poor we have to run it again. This causes a + // latency spike of maybe an additional 50%. But it only occurs when the user + // moves his head far enough - or when the tracking ist lost ... + if (!last_localizer_roi_ || !last_roi_ || + iou(*last_localizer_roi_,*last_roi_)<0.25) + { + auto [p, rect] = localizer_->run(grayscale_); + inference_time += localizer_->last_inference_time_millis(); + + if (last_roi_ && iou(rect,*last_roi_)>=0.25 && p > 0.5) + { + // The new ROI matches the result from tracking, so the user is + // still there and to not disturb recurrent models, we only update + // ... + last_localizer_roi_ = rect; + } + else if (p > 0.5 && rect.height > 32 && rect.width > 32) + { + // Tracking probably got lost since the ROI's don't match, but the + // localizer still finds a face, so we use the ROI from the localizer + last_localizer_roi_ = rect; + last_roi_ = rect; + } + else + { + // Tracking lost and no localization result. The user probably can't be seen. + last_roi_.reset(); + last_localizer_roi_.reset(); + } + } + + if (!last_roi_) + { + // Last iteration the tracker failed to generate a trustworthy + // roi and the localizer also cannot find a face. + draw_gizmos({}, {}); + return false; + } + + auto face = poseestimator_->run(grayscale_, *last_roi_); + inference_time += poseestimator_->last_inference_time_millis(); + + if (!face) + { + last_roi_.reset(); + draw_gizmos({}, {}); + return false; + } + + cv::Rect2f roi = expand(face->box, (float)settings_.roi_zoom); + + last_roi_ = ewa_filter(*last_roi_, roi, float(settings_.roi_filter_alpha)); + + QuatPose pose = compute_filtered_pose(*face); + last_pose_ = pose; + + Affine pose_affine = { + pose.rot.toRotMat3x3(cv::QUAT_ASSUME_UNIT), + pose.pos }; + + { + QMutexLocker lck(&mtx_); + last_pose_affine_ = pose_affine; + } + + draw_gizmos(*face, last_pose_affine_); + + return true; +} + + +void NeuralNetTracker::draw_gizmos( + const std::optional<PoseEstimator::Face> &face, + const Affine& pose) +{ + if (!is_visible_) + return; + + preview_.draw_gizmos( + face, + last_roi_, + last_localizer_roi_, + world_to_image(pose.t, grayscale_.size(), intrinsics_)); + + if (settings_.show_network_input) + { + cv::Mat netinput = poseestimator_->last_network_input(); + preview_.overlay_netinput(netinput); + } +} + + +QuatPose NeuralNetTracker::transform_to_world_pose(const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) const +{ + const vec3 face_world_pos = image_to_world( + face_xy.x, face_xy.y, face_size, HEAD_SIZE_MM, + grayscale_.size(), + intrinsics_); + + const cv::Quatf rot_correction = compute_rotation_correction( + face_world_pos); + + cv::Quatf rot = rot_correction * image_to_world(face_rotation); + + // But this is in general not the location of the rotation joint in the neck. + // So we need an extra offset. Which we determine by computing + // z,y,z-pos = head_joint_loc + R_face * offset + const vec3 local_offset = vec3{ + static_cast<float>(settings_.offset_fwd), + static_cast<float>(settings_.offset_up), + static_cast<float>(settings_.offset_right)}; + const vec3 offset = rotate(rot, local_offset); + const vec3 pos = face_world_pos + offset; + + return { rot, pos }; +} + + +QuatPose NeuralNetTracker::compute_filtered_pose(const PoseEstimator::Face &face) +{ + if (fps_ > 0.001 && last_pose_ && poseestimator_->has_uncertainty()) + { + auto image2world = [this](const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) { + return this->transform_to_world_pose(face_rotation, face_xy, face_size); }; + + return apply_filter( + face, + *last_pose_, + 1./fps_, + std::move(image2world), + FiltParams{ + float(settings_.deadzone_hardness), + float(settings_.deadzone_size) + }); + } + else + { + return transform_to_world_pose(face.rotation, face.center, face.size); + } +} + + +NeuralNetTracker::NeuralNetTracker() +{ + opencv_init(); + neuralnet_tracker_tests::run(); +} + + +NeuralNetTracker::~NeuralNetTracker() +{ + requestInterruption(); + wait(); + // fast start/stop causes breakage + portable::sleep(1000); +} + + +module_status NeuralNetTracker::start_tracker(QFrame* videoframe) +{ + videoframe->show(); + video_widget_ = std::make_unique<cv_video_widget>(videoframe); + layout_ = std::make_unique<QHBoxLayout>(); + layout_->setContentsMargins(0, 0, 0, 0); + layout_->addWidget(&*video_widget_); + videoframe->setLayout(&*layout_); + video_widget_->show(); + num_threads_ = settings_.num_threads; + start(); + return status_ok(); +} + + +bool NeuralNetTracker::load_and_initialize_model() +{ + const QString localizer_model_path_enc = + OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-localizer.onnx"; + const QString poseestimator_model_path_enc = get_posenet_filename(); + + try + { + env_ = Ort::Env{ + OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR, + "tracker-neuralnet" + }; + auto opts = Ort::SessionOptions{}; + // Do thread settings here do anything? + // There is a warning which says to control number of threads via + // openmp settings. Which is what we do. + opts.SetIntraOpNumThreads(num_threads_); + opts.SetInterOpNumThreads(1); + allocator_info_ = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + localizer_.emplace( + allocator_info_, + Ort::Session{env_, convert(localizer_model_path_enc).c_str(), opts}); + + qDebug() << "Loading pose net " << poseestimator_model_path_enc; + poseestimator_.emplace( + allocator_info_, + Ort::Session{env_, convert(poseestimator_model_path_enc).c_str(), opts}); + } + catch (const Ort::Exception &e) + { + qDebug() << "Failed to initialize the neural network models. ONNX error message: " + << e.what(); + return false; + } + catch (const std::exception &e) + { + qDebug() << "Failed to initialize the neural network models. Error message: " << e.what(); + return false; + } + + return true; +} + + +bool NeuralNetTracker::open_camera() +{ + int rint = std::clamp(*settings_.resolution, 0, (int)std::size(resolution_choices)-1); + resolution_tuple res = resolution_choices[rint]; + int fps = enum_to_fps(settings_.force_fps); + + QMutexLocker l(&camera_mtx_); + + camera_ = video::make_camera(settings_.camera_name); + + if (!camera_) + return false; + + video::impl::camera::info args {}; + + if (res.width) + { + args.width = res.width; + args.height = res.height; + } + if (fps) + args.fps = fps; + + args.use_mjpeg = settings_.use_mjpeg; + + if (!camera_->start(args)) + { + qDebug() << "neuralnet tracker: can't open camera"; + return false; + } + + return true; +} + + +void NeuralNetTracker::run() +{ + preview_.init(*video_widget_); + + GuardedThreadCountSwitch switch_num_threads_to(num_threads_); + + if (!open_camera()) + return; + + if (!load_and_initialize_model()) + return; + + std::chrono::high_resolution_clock clk; + + while (!isInterruptionRequested()) + { + is_visible_ = check_is_visible(); + auto t = clk.now(); + { + QMutexLocker l(&camera_mtx_); + + auto [ img, res ] = camera_->get_frame(); + + if (!res) + { + l.unlock(); + portable::sleep(100); + continue; + } + + { + QMutexLocker lck{&stats_mtx_}; + resolution_ = { img.width, img.height }; + } + + auto color = prepare_input_image(img); + + if (is_visible_) + preview_.copy_video_frame(color); + + switch (img.channels) + { + case 1: + grayscale_.create(img.height, img.width, CV_8UC1); + color.copyTo(grayscale_); + break; + case 3: + cv::cvtColor(color, grayscale_, cv::COLOR_BGR2GRAY); + break; + default: + qDebug() << "Can't handle" << img.channels << "color channels"; + return; + } + } + + intrinsics_ = make_intrinsics(grayscale_, settings_); + + detect(); + + if (is_visible_) + preview_.copy_to_widget(*video_widget_); + + update_fps( + std::chrono::duration_cast<std::chrono::milliseconds>( + clk.now() - t).count()*1.e-3); + } +} + + +cv::Mat NeuralNetTracker::prepare_input_image(const video::frame& frame) +{ + auto img = cv::Mat(frame.height, frame.width, CV_8UC(frame.channels), (void*)frame.data, frame.stride); + + // Crop if aspect ratio is not 4:3 + if (img.rows*4 != img.cols*3) + { + img = img(make_crop_rect_for_aspect(img.size(), 4, 3)); + } + + img = img(make_crop_rect_multiple_of(img.size(), 4)); + + if (img.cols > 640) + { + cv::pyrDown(img, downsized_original_images_[0]); + img = downsized_original_images_[0]; + } + if (img.cols > 640) + { + cv::pyrDown(img, downsized_original_images_[1]); + img = downsized_original_images_[1]; + } + + return img; +} + + +void NeuralNetTracker::update_fps(double dt) +{ + const double alpha = dt/(dt + RC); + if (dt > 1e-6) + { + QMutexLocker lck{&stats_mtx_}; + fps_ *= 1 - alpha; + fps_ += alpha * 1./dt; + } +} + + +void NeuralNetTracker::data(double *data) +{ + Affine tmp = [&]() + { + QMutexLocker lck(&mtx_); + return last_pose_affine_; + }(); + + const auto& mx = tmp.R.col(0); + const auto& my = tmp.R.col(1); + const auto& mz = tmp.R.col(2); + + // For reference: https://en.wikipedia.org/wiki/Euler_angles. Section "Rotation matrix". The relevant matrix is + // under "Tait-Bryan angles", row with "Y_alpha Z_beta X_gamma = ...". + // Because for the NN tracker x is forward, and y is up. We can see that the x axis is independent of roll. Thus it + // is relatively easy to figure out the yaw and pitch angles (alpha and beta). + const float yaw = std::atan2(mx(2), mx(0)); + const float pitch = -std::atan2(-mx(1), std::sqrt(mx(2)*mx(2)+mx(0)*mx(0))); + // For the roll angle we recognize that the matrix entries in the second row contain cos(pitch)*cos(roll), and + // cos(pitch)*sin(roll). Using atan2 eliminates the common pitch factor and we obtain the roll angle. + const float roll = std::atan2(-mz(1), my(1)); + { + constexpr double rad2deg = 180/M_PI; + data[Yaw] = rad2deg * yaw; + data[Pitch] = rad2deg * pitch; + data[Roll] = -rad2deg * roll; + + // convert to cm + data[TX] = -tmp.t[2] * 0.1; + data[TY] = tmp.t[1] * 0.1; + data[TZ] = -tmp.t[0] * 0.1; + } +} + + +Affine NeuralNetTracker::pose() +{ + QMutexLocker lck(&mtx_); + return last_pose_affine_; +} + + +std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const +{ + QMutexLocker lck(&stats_mtx_); + return { resolution_, fps_, inference_time_ }; +} + + +QString NeuralNetTracker::get_posenet_filename() const +{ + QString filename = settings_.posenet_file; + if (QFileInfo(filename).isRelative()) + filename = get_default_model_directory().absoluteFilePath(filename); + return filename; +} + + +void NeuralNetDialog::make_fps_combobox() +{ + for (int k = 0; k < fps_MAX; k++) + { + const int hz = enum_to_fps(k); + const QString name = (hz == 0) ? tr("Default") : QString::number(hz); + ui_.cameraFPS->addItem(name, k); + } +} + +void NeuralNetDialog::make_resolution_combobox() +{ + int k=0; + for (const auto [w, h] : resolution_choices) + { + const QString s = (w == 0) + ? tr("Default") + : QString::number(w) + " x " + QString::number(h); + ui_.resolution->addItem(s, k++); + } +} + + +NeuralNetDialog::NeuralNetDialog() : + trans_calib_(1, 2) +{ + ui_.setupUi(this); + + make_fps_combobox(); + make_resolution_combobox(); + + for (const auto& str : video::camera_names()) + ui_.cameraName->addItem(str); + + tie_setting(settings_.camera_name, ui_.cameraName); + tie_setting(settings_.fov, ui_.cameraFOV); + tie_setting(settings_.offset_fwd, ui_.tx_spin); + tie_setting(settings_.offset_up, ui_.ty_spin); + tie_setting(settings_.offset_right, ui_.tz_spin); + tie_setting(settings_.show_network_input, ui_.showNetworkInput); + tie_setting(settings_.roi_filter_alpha, ui_.roiFilterAlpha); + tie_setting(settings_.use_mjpeg, ui_.use_mjpeg); + tie_setting(settings_.roi_zoom, ui_.roiZoom); + tie_setting(settings_.num_threads, ui_.threadCount); + tie_setting(settings_.resolution, ui_.resolution); + tie_setting(settings_.force_fps, ui_.cameraFPS); + tie_setting(settings_.posenet_file, ui_.posenetFileDisplay); + + connect(ui_.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); + connect(ui_.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); + connect(ui_.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings())); + connect(ui_.posenetSelectButton, SIGNAL(clicked()), this, SLOT(onSelectPoseNetFile())); + connect(&settings_.camera_name, value_::value_changed<QString>(), this, &NeuralNetDialog::update_camera_settings_state); + + update_camera_settings_state(settings_.camera_name); + + connect(&calib_timer_, &QTimer::timeout, this, &NeuralNetDialog::trans_calib_step); + calib_timer_.setInterval(35); + connect(ui_.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool))); + + connect(&tracker_status_poll_timer_, &QTimer::timeout, this, &NeuralNetDialog::status_poll); + tracker_status_poll_timer_.setInterval(250); + tracker_status_poll_timer_.start(); +} + +void NeuralNetDialog::save() +{ + settings_.b->save(); +} + +void NeuralNetDialog::reload() +{ + settings_.b->reload(); +} + +void NeuralNetDialog::doOK() +{ + save(); + close(); +} + + +void NeuralNetDialog::doCancel() +{ + close(); +} + + +void NeuralNetDialog::camera_settings() +{ + if (tracker_) + { + QMutexLocker l(&tracker_->camera_mtx_); + (void)tracker_->camera_->show_dialog(); + } + else + (void)video::show_dialog(settings_.camera_name); +} + + +void NeuralNetDialog::update_camera_settings_state(const QString& name) +{ + (void)name; + ui_.camera_settings->setEnabled(true); +} + + +void NeuralNetDialog::register_tracker(ITracker * x) +{ + tracker_ = static_cast<NeuralNetTracker*>(x); + ui_.tcalib_button->setEnabled(true); +} + + +void NeuralNetDialog::unregister_tracker() +{ + tracker_ = nullptr; + ui_.tcalib_button->setEnabled(false); +} + +bool NeuralNetDialog::embeddable() noexcept +{ + return true; +} + +void NeuralNetDialog::set_buttons_visible(bool x) +{ + ui_.buttonBox->setVisible(x); +} + +void NeuralNetDialog::status_poll() +{ + QString status; + if (!tracker_) + { + status = tr("Tracker Offline"); + } + else + { + auto [ res, fps, inference_time ] = tracker_->stats(); + status = tr("%1x%2 @ %3 FPS / Inference: %4 ms").arg(res.width).arg(res.height).arg(int(fps)).arg(inference_time, 0, 'f', 1); + } + ui_.resolution_display->setText(status); +} + + +void NeuralNetDialog::trans_calib_step() +{ + if (tracker_) + { + const Affine X_CM = [&]() { + QMutexLocker l(&calibrator_mutex_); + return tracker_->pose(); + }(); + trans_calib_.update(X_CM.R, X_CM.t); + auto [_, nsamples] = trans_calib_.get_estimate(); + + constexpr int min_yaw_samples = 15; + constexpr int min_pitch_samples = 12; + constexpr int min_samples = min_yaw_samples+min_pitch_samples; + + // Don't bother counting roll samples. Roll calibration is hard enough + // that it's a hidden unsupported feature anyway. + + QString sample_feedback; + if (nsamples[0] < min_yaw_samples) + sample_feedback = tr("%1 yaw samples. Yaw more to %2 samples for stable calibration.").arg(nsamples[0]).arg(min_yaw_samples); + else if (nsamples[1] < min_pitch_samples) + sample_feedback = tr("%1 pitch samples. Pitch more to %2 samples for stable calibration.").arg(nsamples[1]).arg(min_pitch_samples); + else + { + const int nsamples_total = nsamples[0] + nsamples[1]; + sample_feedback = tr("%1 samples. Over %2, good!").arg(nsamples_total).arg(min_samples); + } + ui_.sample_count_display->setText(sample_feedback); + } + else + startstop_trans_calib(false); +} + + +void NeuralNetDialog::startstop_trans_calib(bool start) +{ + QMutexLocker l(&calibrator_mutex_); + // FIXME: does not work ... + if (start) + { + qDebug() << "pt: starting translation calibration"; + calib_timer_.start(); + trans_calib_.reset(); + ui_.sample_count_display->setText(QString()); + // Tracker must run with zero'ed offset for calibration. + settings_.offset_fwd = 0; + settings_.offset_up = 0; + settings_.offset_right = 0; + } + else + { + calib_timer_.stop(); + qDebug() << "pt: stopping translation calibration"; + { + auto [tmp, nsamples] = trans_calib_.get_estimate(); + settings_.offset_fwd = int(tmp[0]); + settings_.offset_up = int(tmp[1]); + settings_.offset_right = int(tmp[2]); + } + } + ui_.tx_spin->setEnabled(!start); + ui_.ty_spin->setEnabled(!start); + ui_.tz_spin->setEnabled(!start); + + if (start) + ui_.tcalib_button->setText(tr("Stop calibration")); + else + ui_.tcalib_button->setText(tr("Start calibration")); +} + + +void NeuralNetDialog::onSelectPoseNetFile() +{ + const auto root = get_default_model_directory(); + // Start with the current setting + QString filename = settings_.posenet_file; + // If the filename is relative then assume that the file is located under the + // model directory. Under regular use this should always be the case. + if (QFileInfo(filename).isRelative()) + filename = root.absoluteFilePath(filename); + filename = QFileDialog::getOpenFileName(this, + tr("Select Pose Net ONNX"), filename, tr("ONNX Files (*.onnx)")); + // In case the user aborted. + if (filename.isEmpty()) + return; + // When a file under the model directory was selected we can get rid of the + // directory prefix. This is more robust than storing absolute paths, e.g. + // in case the user moves the opentrack install folder / reuses old settings. + // When the file is not in the model directory, we have to use the absolute path, + // which is also fine as developer feature. + if (filename.startsWith(root.absolutePath())) + filename = root.relativeFilePath(filename); + settings_.posenet_file = filename; +} + + +Settings::Settings() : opts("neuralnet-tracker") {} + +} // neuralnet_tracker_ns + +OPENTRACK_DECLARE_TRACKER(NeuralNetTracker, NeuralNetDialog, NeuralNetMetadata) diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h new file mode 100644 index 00000000..fe755f51 --- /dev/null +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -0,0 +1,230 @@ +/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + */ + +#pragma once + +#include "ui_neuralnet-trackercontrols.h" +#include "model_adapters.h" +#include "deadzone_filter.h" +#include "preview.h" + +#include "options/options.hpp" +#include "api/plugin-api.hpp" +#include "cv/video-widget.hpp" +#include "cv/translation-calibrator.hpp" +#include "cv/numeric.hpp" +#include "compat/timer.hpp" +#include "video/camera.hpp" +#include "cv/affine.hpp" + +#include <QObject> +#include <QThread> +#include <QMutex> +#include <QHBoxLayout> +#include <QDialog> +#include <QTimer> + +#include <memory> +#include <cinttypes> +#include <array> + +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> + + +namespace neuralnet_tracker_ns +{ + + +using namespace options; + + +enum fps_choices +{ + fps_default = 0, + fps_30 = 1, + fps_60 = 2, + fps_75 = 3, + fps_125 = 4, + fps_200 = 5, + fps_50 = 6, + fps_100 = 7, + fps_120 = 8, + fps_300 = 9, + fps_250 = 10, + fps_MAX = 11, +}; + +struct resolution_tuple +{ + int width; + int height; +}; + +static const std::array<resolution_tuple, 7> resolution_choices = +{{ + { 320, 240 }, + { 640, 480 }, + { 800, 600 }, + { 1024, 768 }, + { 1280, 720 }, + { 1920, 1080}, + { 0, 0 } +}}; + + +struct Settings : opts { + value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters + offset_up { b, "offset-up", 0 }, + offset_right { b, "offset-right", 0 }; + value<QString> camera_name { b, "camera-name", ""}; + value<int> fov { b, "field-of-view", 56 }; + value<fps_choices> force_fps { b, "force-fps", fps_default }; + value<bool> show_network_input { b, "show-network-input", false }; + value<double> roi_filter_alpha{ b, "roi-filter-alpha", 1. }; + value<double> roi_zoom{ b, "roi-zoom", 1. }; + value<bool> use_mjpeg { b, "use-mjpeg", false }; + value<int> num_threads { b, "num-threads", 1 }; + value<int> resolution { b, "force-resolution", 0 }; + value<double> deadzone_size { b, "deadzone-size", 1. }; + value<double> deadzone_hardness { b, "deadzone-hardness", 1.5 }; + value<QString> posenet_file { b, "posenet-file", "head-pose-0.3-big-quantized.onnx" }; + Settings(); +}; + + +struct CamIntrinsics +{ + float focal_length_w; + float focal_length_h; + float fov_w; + float fov_h; +}; + + +class NeuralNetTracker : protected virtual QThread, public ITracker +{ + Q_OBJECT +public: + NeuralNetTracker(); + ~NeuralNetTracker() override; + module_status start_tracker(QFrame* frame) override; + void data(double *data) override; + void run() override; + Affine pose(); + std::tuple<cv::Size, double, double> stats() const; + + QMutex camera_mtx_; + std::unique_ptr<video::impl::camera> camera_; + +private: + bool detect(); + bool open_camera(); + void set_intrinsics(); + cv::Mat prepare_input_image(const video::frame& frame); + bool load_and_initialize_model(); + void draw_gizmos( + const std::optional<PoseEstimator::Face> &face, + const Affine& pose); + void update_fps(double dt); + // Secretly applies filtering while computing the pose in 3d space. + QuatPose compute_filtered_pose(const PoseEstimator::Face &face); + // Compute the pose in 3d space taking the network outputs + QuatPose transform_to_world_pose(const cv::Quatf &face_rotation, const cv::Point2f& face_xy, const float face_size) const; + QString get_posenet_filename() const; + + Settings settings_; + std::optional<Localizer> localizer_; + std::optional<PoseEstimator> poseestimator_; + Ort::Env env_{nullptr}; + Ort::MemoryInfo allocator_info_{nullptr}; + + CamIntrinsics intrinsics_{}; + cv::Mat grayscale_; + std::array<cv::Mat,2> downsized_original_images_ = {}; // Image pyramid + std::optional<cv::Rect2f> last_localizer_roi_; + std::optional<cv::Rect2f> last_roi_; + static constexpr float HEAD_SIZE_MM = 200.f; // In the vertical. Approximately. + + mutable QMutex stats_mtx_; + double fps_ = 0; + double inference_time_ = 0; + cv::Size resolution_ = {}; + + static constexpr double RC = .25; + int num_threads_ = 1; + bool is_visible_ = true; + + QMutex mtx_ = {}; // Protects the pose + std::optional<QuatPose> last_pose_ = {}; + Affine last_pose_affine_ = {}; + + Preview preview_; + std::unique_ptr<cv_video_widget> video_widget_; + std::unique_ptr<QHBoxLayout> layout_; +}; + + +class NeuralNetDialog : public ITrackerDialog +{ + Q_OBJECT +public: + NeuralNetDialog(); + void register_tracker(ITracker * x) override; + void unregister_tracker() override; + + bool embeddable() noexcept override; + void set_buttons_visible(bool x) override; +private: + void make_fps_combobox(); + void make_resolution_combobox(); + + Ui::Form ui_; + Settings settings_; + // Calibration code mostly taken from point tracker + QTimer calib_timer_; + TranslationCalibrator trans_calib_; + QMutex calibrator_mutex_; + QTimer tracker_status_poll_timer_; + NeuralNetTracker* tracker_ = nullptr; + +private Q_SLOTS: + void save() override; + void reload() override; + void doOK(); + void doCancel(); + void camera_settings(); + void update_camera_settings_state(const QString& name); + void startstop_trans_calib(bool start); + void trans_calib_step(); + void status_poll(); + void onSelectPoseNetFile(); +}; + + +class NeuralNetMetadata : public Metadata +{ + Q_OBJECT + QString name() override { return QString("neuralnet tracker"); } + QIcon icon() override { return QIcon(":/images/neuralnet.png"); } +}; + + +} // neuralnet_tracker_ns + + +namespace neuralnet_tracker_tests +{ + +void run(); + +} + + +using neuralnet_tracker_ns::NeuralNetTracker; +using neuralnet_tracker_ns::NeuralNetDialog; +using neuralnet_tracker_ns::NeuralNetMetadata; diff --git a/tracker-neuralnet/images/neuralnet.png b/tracker-neuralnet/images/neuralnet.png Binary files differnew file mode 100644 index 00000000..1a10c53c --- /dev/null +++ b/tracker-neuralnet/images/neuralnet.png diff --git a/tracker-neuralnet/lang/de_DE.ts b/tracker-neuralnet/lang/de_DE.ts new file mode 100644 index 00000000..6261eec0 --- /dev/null +++ b/tracker-neuralnet/lang/de_DE.ts @@ -0,0 +1,172 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1" language="de_DE"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation>Tracker-Einstellungen</translation> + </message> + <message> + <source>Head Center Offset</source> + <translation>Versatz zur Kopfmitte</translation> + </message> + <message> + <source>Right</source> + <translation>Rechts</translation> + </message> + <message> + <source>Forward</source> + <translation>Vorwärts</translation> + </message> + <message> + <source>Up</source> + <translation>Hoch</translation> + </message> + <message> + <source> mm</source> + <translation> mm</translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation>Während der Kalibrierung nur gieren und nicken. +Bitte nicht rollen oder die Position ändern.</translation> + </message> + <message> + <source>Start calibration</source> + <translation>Kalibrierung starten</translation> + </message> + <message> + <source>Camera Configuration</source> + <translation>Kamera-Konfiguration</translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation>Diagonales Sichtfeld</translation> + </message> + <message> + <source>Camera name</source> + <translation>Kamera-Name</translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation>Sichtfeld. Benötigt, um die Pose in Welt-Koordinaten zu übersetzen.</translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation>Die angeforderte Auflösung für Fälle, in denen die Kamera die maximale Bildrate nur bei bestimmten Auflösungen ausgibt. Das Bild wird möglicherweise weiterhin herunterskaliert auf die interne Auflösung.</translation> + </message> + <message> + <source>Resolution</source> + <translation>Auflösung</translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation>Angeforderte Bildrate. Die tatsächliche Einstellungen wird von der Kamera möglicherweise nicht unterstützt.</translation> + </message> + <message> + <source>Frames per second</source> + <translation>Bilder pro Sekunde</translation> + </message> + <message> + <source>MJPEG</source> + <translation>MJPEG</translation> + </message> + <message> + <source>Camera settings</source> + <translation>Kamera-Einstellungen</translation> + </message> + <message> + <source>Tuning / Debug</source> + <translation>Tuning / Fehlersuche</translation> + </message> + <message> + <source>Thread Count</source> + <translation>Anzahl der Threads</translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation>Anzahl der Threads. Kann verwendet werden, um die CPU-Last zwischen Spiel und Tracker zu balancieren.</translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation>Zeigt den Bildausschnitt, den das Modell zur Posenabschätzung sieht.</translation> + </message> + <message> + <source>Show Network Input</source> + <translation>Zeige Netzwerk-Eingabe</translation> + </message> + <message> + <source>ROI Smoothing Alpha</source> + <translation>ROI-Glättungsalpha</translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation>Umfang der Glättung der Gesichtkoordinaten. Kann helfen, die Pose zu stabilisieren.</translation> + </message> + <message> + <source>ROI Zoom</source> + <translation>ROI-Zoom</translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation>Zoom-Faktor der Gesichtsregion. Wird angewendet, bevor der Bildausschnitt zum Posen-Abschätzungsmodell gesendet wird. Der Sweet-Spot liegt nahe bei 1.</translation> + </message> + <message> + <source>Select the pose network. Changes take affect on the next tracker start</source> + <translation>Wählt das Pose-Netzwerk. Die Änderungen treten beim nächsten Start des Trackers inkraft</translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation>Wähle Pose-Netzwerk ONNX</translation> + </message> + <message> + <source><the pose net file></source> + <translation><die pose netzwerk datei></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::NeuralNetDialog</name> + <message> + <source>Default</source> + <translation>Standard</translation> + </message> + <message> + <source>Tracker Offline</source> + <translation>Tracker offline</translation> + </message> + <message> + <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> + <translation>%1x%2 @ %3 FPS / Inferenz: %4 ms</translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation>%1 Gieren-Proben. Weiterhin gieren bis %2 Proben für eine stabile Kalibrierung.</translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation>%1 Nicken-Proben. Weiterhin nicken bis %2 Proben für eine stabile Kalibrierung.</translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation>%1 Proben. Mehr als %2, gut!</translation> + </message> + <message> + <source>Stop calibration</source> + <translation>Kalibrierung stoppen</translation> + </message> + <message> + <source>Start calibration</source> + <translation>Kalibrierung starten</translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation>Wähle Pose-Netzwerk ONNX</translation> + </message> + <message> + <source>ONNX Files (*.onnx)</source> + <translation>ONNX-Dateien (*.onnx)</translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts new file mode 100644 index 00000000..27da4f5a --- /dev/null +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -0,0 +1,171 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1" language="nl_NL"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation>Tracker-instellingen</translation> + </message> + <message> + <source>Frames per second</source> + <translation>Frames per seconde</translation> + </message> + <message> + <source>Camera name</source> + <translation>Cameranaam</translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation>Diagonale FOV</translation> + </message> + <message> + <source>Camera settings</source> + <translation>Camera-instellingen</translation> + </message> + <message> + <source>Camera Configuration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Head Center Offset</source> + <translation type="unfinished"></translation> + </message> + <message> + <source> mm</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Right</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Forward</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Up</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show Network Input</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>MJPEG</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Tuning / Debug</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ROI Smoothing Alpha</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ROI Zoom</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Thread Count</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Resolution</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation type="unfinished"></translation> + </message> + <message> + <source><the pose net file></source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select the pose network. Changes take affect on the next tracker start</source> + <translation type="unfinished"></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::NeuralNetDialog</name> + <message> + <source>Default</source> + <translation type="unfinished">Standaard</translation> + </message> + <message> + <source>Tracker Offline</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Stop calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ONNX Files (*.onnx)</source> + <translation type="unfinished"></translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts new file mode 100644 index 00000000..c32d4fa7 --- /dev/null +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -0,0 +1,174 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1" language="ru_RU"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation>Настройки трекера</translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation>Угол обзора</translation> + </message> + <message> + <source>Camera settings</source> + <translation>Настройки камеры</translation> + </message> + <message> + <source>Frames per second</source> + <translation>Кадры в секунду</translation> + </message> + <message> + <source>Camera name</source> + <translation>Камера</translation> + </message> + <message> + <source>Camera Configuration</source> + <translation>Конфигурация камеры</translation> + </message> + <message> + <source>Head Center Offset</source> + <translation>Смещение центра головы</translation> + </message> + <message> + <source> mm</source> + <translation> мм</translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation>Поворачивайте голову влево-вправо и наклоняйте вверх-вниз. +Не наклоняйте набок и не смещайте голову в сторону.</translation> + </message> + <message> + <source>Start calibration</source> + <translation>Начать калибровку</translation> + </message> + <message> + <source>Right</source> + <translation>Вправо</translation> + </message> + <message> + <source>Forward</source> + <translation>Вперед</translation> + </message> + <message> + <source>Up</source> + <translation>Вверх</translation> + </message> + <message> + <source>Show Network Input</source> + <translation>Показать входные данные</translation> + </message> + <message> + <source>MJPEG</source> + <translation>Использовать MJPEG</translation> + </message> + <message> + <source>Tuning / Debug</source> + <translation>Тонкая настройка</translation> + </message> + <message> + <source>ROI Smoothing Alpha</source> + <translation>Сглаживание ROI</translation> + </message> + <message> + <source>ROI Zoom</source> + <translation>Масштабирование ROI</translation> + </message> + <message> + <source>Thread Count</source> + <translation>Количество потоков</translation> + </message> + <message> + <source>Resolution</source> + <translation>Разрешение</translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation>Угол обзора камеры. Требуется для преобразования положения головы в глобальные координаты</translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation>Частота кадров. Реальные значения могут не поддерживаться камерой.</translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation>Разрешение камеры, для тех случаев, когда быстродействие камеры максимально в определенном разрешении. Может быть масштабировано до внутреннего разрешения.</translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation>Количество потоков. Используется для балансировки нагрузки на процессор между игрой и трекером.</translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation>Показать изображение, используемое моделью определения позиции</translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation>Сглаживание координат области лица. Может помочь стабилизировать позицию.</translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation>Фактор масштабирования области лица. Применяется перед передачей кадра в модель определения позиции. Наилучшие результаты близки к 1</translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation type="unfinished"></translation> + </message> + <message> + <source><the pose net file></source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select the pose network. Changes take affect on the next tracker start</source> + <translation type="unfinished"></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::NeuralNetDialog</name> + <message> + <source>Default</source> + <translation>По умолчанию</translation> + </message> + <message> + <source>Tracker Offline</source> + <translation>Трекер выключен</translation> + </message> + <message> + <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> + <translation>%1x%2 @ %3 FPS; Время оценки: %4 мс</translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation>Сэмплов поворота: %1. +Поворачивайте голову в стороны до %2 сэмплов для стабильной калибрации.</translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation>Сэмплов наклона: %1. +Наклоняйте голову вниз/вверх до %2 сэмплов для стабильной калибрации.</translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation>%1 сэмплов. Более %2, достаточно.</translation> + </message> + <message> + <source>Stop calibration</source> + <translation>Остановить калибровку</translation> + </message> + <message> + <source>Start calibration</source> + <translation>Начать калибровку</translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ONNX Files (*.onnx)</source> + <translation type="unfinished"></translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts new file mode 100644 index 00000000..9609f05e --- /dev/null +++ b/tracker-neuralnet/lang/stub.ts @@ -0,0 +1,171 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera settings</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Frames per second</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera name</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Camera Configuration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Head Center Offset</source> + <translation type="unfinished"></translation> + </message> + <message> + <source> mm</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Right</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Forward</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Up</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show Network Input</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>MJPEG</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Tuning / Debug</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ROI Smoothing Alpha</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ROI Zoom</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Thread Count</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Resolution</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation type="unfinished"></translation> + </message> + <message> + <source><the pose net file></source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select the pose network. Changes take affect on the next tracker start</source> + <translation type="unfinished"></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::NeuralNetDialog</name> + <message> + <source>Default</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Tracker Offline</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Stop calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Start calibration</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ONNX Files (*.onnx)</source> + <translation type="unfinished"></translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts new file mode 100644 index 00000000..53da04ae --- /dev/null +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -0,0 +1,172 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE TS> +<TS version="2.1" language="zh_CN"> +<context> + <name>Form</name> + <message> + <source>Tracker settings</source> + <translation>追踪器设置</translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation>对角FOV</translation> + </message> + <message> + <source>Camera name</source> + <translation>相机名</translation> + </message> + <message> + <source>Frames per second</source> + <translation>FPS</translation> + </message> + <message> + <source>Camera settings</source> + <translation>相机设置</translation> + </message> + <message> + <source>Camera Configuration</source> + <translation>相机配置</translation> + </message> + <message> + <source>Head Center Offset</source> + <translation>头部归中补偿</translation> + </message> + <message> + <source> mm</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Use only yaw and pitch while calibrating. +Don't roll or change position.</source> + <translation>在校准时只使用偏航和俯仰, +不要滚转或是改变位置. </translation> + </message> + <message> + <source>Start calibration</source> + <translation>开始校准</translation> + </message> + <message> + <source>Right</source> + <translation>向右</translation> + </message> + <message> + <source>Forward</source> + <translation>向前</translation> + </message> + <message> + <source>Up</source> + <translation>向上</translation> + </message> + <message> + <source>Show Network Input</source> + <translation>展示神经网络输入</translation> + </message> + <message> + <source>MJPEG</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Tuning / Debug</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ROI Smoothing Alpha</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ROI Zoom</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Thread Count</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Resolution</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select the pose network. Changes take affect on the next tracker start</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation type="unfinished"></translation> + </message> + <message> + <source><the pose net file></source> + <translation type="unfinished"></translation> + </message> +</context> +<context> + <name>neuralnet_tracker_ns::NeuralNetDialog</name> + <message> + <source>Default</source> + <translation>默认</translation> + </message> + <message> + <source>Tracker Offline</source> + <translation>追踪器离线</translation> + </message> + <message> + <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> + <translation>%1x%2 @ %3 FPS / 推理: %4 ms</translation> + </message> + <message> + <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>%1 samples. Over %2, good!</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Stop calibration</source> + <translation>结束校准</translation> + </message> + <message> + <source>Start calibration</source> + <translation>开始校准</translation> + </message> + <message> + <source>Select Pose Net ONNX</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>ONNX Files (*.onnx)</source> + <translation type="unfinished"></translation> + </message> +</context> +</TS> diff --git a/tracker-neuralnet/model_adapters.cpp b/tracker-neuralnet/model_adapters.cpp new file mode 100644 index 00000000..f53478af --- /dev/null +++ b/tracker-neuralnet/model_adapters.cpp @@ -0,0 +1,416 @@ +#include "model_adapters.h" + +#include "compat/timer.hpp" + +#include <opencv2/core.hpp> +#include <opencv2/core/quaternion.hpp> +#include <opencv2/imgproc.hpp> + +#include <QDebug> + +namespace neuralnet_tracker_ns +{ + + +float sigmoid(float x) +{ + return 1.f/(1.f + std::exp(-x)); +} + + +// Defined in ftnoir_tracker_neuralnet.cpp +// Normally we wouldn't need it here. However ... see below. +cv::Quatf image_to_world(cv::Quatf q); + + +cv::Quatf world_to_image(cv::Quatf q) +{ + // It's its own inverse. + return image_to_world(q); +} + + +cv::Rect2f unnormalize(const cv::Rect2f &r, int h, int w) +{ + auto unnorm = [](float x) -> float { return 0.5*(x+1); }; + auto tl = r.tl(); + auto br = r.br(); + auto x0 = unnorm(tl.x)*w; + auto y0 = unnorm(tl.y)*h; + auto x1 = unnorm(br.x)*w; + auto y1 = unnorm(br.y)*h; + return { + x0, y0, x1-x0, y1-y0 + }; +} + + +// Returns width and height of the input tensor, or throws. +// Expects the model to take one tensor as input that must +// have the shape B x C x H x W, where B=C=1. +cv::Size get_input_image_shape(const Ort::Session &session) +{ + if (session.GetInputCount() < 1) + throw std::invalid_argument("Model must take at least one input tensor"); + const std::vector<std::int64_t> shape = + session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + if (shape.size() != 4) + throw std::invalid_argument("Model takes the input tensor in the wrong shape"); + return { static_cast<int>(shape[3]), static_cast<int>(shape[2]) }; +} + + +Ort::Value create_tensor(const Ort::TypeInfo& info, Ort::Allocator& alloc) +{ + const auto shape = info.GetTensorTypeAndShapeInfo().GetShape(); + auto t = Ort::Value::CreateTensor<float>( + alloc, shape.data(), shape.size()); + memset(t.GetTensorMutableData<float>(), 0, sizeof(float)*info.GetTensorTypeAndShapeInfo().GetElementCount()); + return t; +} + + +int find_input_intensity_quantile(const cv::Mat& frame, float percentage) +{ + const int channels[] = { 0 }; + const int hist_size[] = { 256 }; + float range[] = { 0, 256 }; + const float* ranges[] = { range }; + cv::Mat hist; + cv::calcHist(&frame, 1, channels, cv::Mat(), hist, 1, hist_size, ranges, true, false); + int gray_level = 0; + const int num_pixels_quantile = frame.total()*percentage*0.01f; + int num_pixels_accum = 0; + for (int i=0; i<hist_size[0]; ++i) + { + num_pixels_accum += hist.at<float>(i); + if (num_pixels_accum > num_pixels_quantile) + { + gray_level = i; + break; + } + } + return gray_level; +} + + +// Automatic brightness adjustment. Scales brightness to lie between -.5 and 0.5, roughly. +void normalize_brightness(const cv::Mat& frame, cv::Mat& out) +{ + const float pct = 90; + + const int brightness = find_input_intensity_quantile(frame, pct); + + const double alpha = brightness<127 ? (pct/100.f*0.5f/std::max(5,brightness)) : 1./255; + const double beta = -0.5; + + frame.convertTo(out, CV_32F, alpha, beta); +} + + + +Localizer::Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session) : + session_{std::move(session)}, + scaled_frame_(INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH, CV_8U), + input_mat_(INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH, CV_32F) +{ + // Only works when input_mat does not reallocated memory ...which it should not. + // Non-owning memory reference to input_mat? + // Note: shape = (bach x channels x h x w) + const std::int64_t input_shape[4] = { 1, 1, INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH }; + input_val_ = Ort::Value::CreateTensor<float>(allocator_info, input_mat_.ptr<float>(0), input_mat_.total(), input_shape, 4); + + const std::int64_t output_shape[2] = { 1, 5 }; + output_val_ = Ort::Value::CreateTensor<float>(allocator_info, results_.data(), results_.size(), output_shape, 2); +} + + +std::pair<float, cv::Rect2f> Localizer::run( + const cv::Mat &frame) +{ + auto p = input_mat_.ptr(0); + + cv::resize(frame, scaled_frame_, { INPUT_IMG_WIDTH, INPUT_IMG_HEIGHT }, 0, 0, cv::INTER_AREA); + scaled_frame_.convertTo(input_mat_, CV_32F, 1./255., -0.5); + + assert (input_mat_.ptr(0) == p); + assert (!input_mat_.empty() && input_mat_.isContinuous()); + assert (input_mat_.cols == INPUT_IMG_WIDTH && input_mat_.rows == INPUT_IMG_HEIGHT); + + const char* input_names[] = {"x"}; + const char* output_names[] = {"logit_box"}; + + Timer t; t.start(); + + session_.Run(Ort::RunOptions{nullptr}, input_names, &input_val_, 1, output_names, &output_val_, 1); + + last_inference_time_ = t.elapsed_ms(); + + const cv::Rect2f roi = unnormalize(cv::Rect2f{ + results_[1], + results_[2], + results_[3]-results_[1], // Width + results_[4]-results_[2] // Height + }, frame.rows, frame.cols); + const float score = sigmoid(results_[0]); + + return { score, roi }; +} + + +double Localizer::last_inference_time_millis() const +{ + return last_inference_time_; +} + + +std::string PoseEstimator::get_network_input_name(size_t i) const +{ +#if ORT_API_VERSION >= 12 + return std::string(&*session_.GetInputNameAllocated(i, allocator_)); +#else + return std::string(session_.GetInputName(i, allocator_)); +#endif +} + +std::string PoseEstimator::get_network_output_name(size_t i) const +{ +#if ORT_API_VERSION >= 12 + return std::string(&*session_.GetOutputNameAllocated(i, allocator_)); +#else + return std::string(session_.GetOutputName(i, allocator_)); +#endif +} + +PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session) + : model_version_{session.GetModelMetadata().GetVersion()} + , session_{std::move(session)} + , allocator_{session_, allocator_info} +{ + using namespace std::literals::string_literals; + + if (session_.GetOutputCount() < 2) + throw std::runtime_error("Invalid Model: must have at least two outputs"); + + // WARNING: Messy model compatibility issues! + // When reading the initial model release, it did not have the version field set. + // Reading it here will result in some unspecified value. It's probably UB due to + // reading uninitialized memory. But there is little choice. + // Now, detection of this old version is messy ... we have to guess based on the + // number we get. Getting an uninitialized value matching a valid version is unlikely. + // But the real problem is that this line must be updated whenever we want to bump the + // version number!! + if (model_version_ <= 0 || model_version_ > 4) + model_version_ = 1; + + const cv::Size input_image_shape = get_input_image_shape(session_); + + scaled_frame_ = cv::Mat(input_image_shape, CV_8U, cv::Scalar(0)); + input_mat_ = cv::Mat(input_image_shape, CV_32F, cv::Scalar(0.f)); + + { + const std::int64_t input_shape[4] = { 1, 1, input_image_shape.height, input_image_shape.width }; + input_val_.push_back( + Ort::Value::CreateTensor<float>(allocator_info, input_mat_.ptr<float>(0), input_mat_.total(), input_shape, 4)); + } + + struct TensorSpec + { + std::vector<int64_t> shape; + float* buffer = nullptr; + size_t element_count = 0; + bool available = false; + }; + + std::unordered_map<std::string, TensorSpec> understood_outputs = { + { "pos_size", TensorSpec{ { 1, 3 }, &output_coord_[0], output_coord_.rows } }, + { "quat", TensorSpec{ { 1, 4}, &output_quat_[0], output_quat_.rows } }, + { "box", TensorSpec{ { 1, 4}, &output_box_[0], output_box_.rows } }, + { "rotaxis_scales_tril", TensorSpec{ {1, 3, 3}, output_rotaxis_scales_tril_.val, 9 }}, + { "rotaxis_std", TensorSpec{ {1, 3, 3}, output_rotaxis_scales_tril_.val, 9 }}, // TODO: Delete when old models aren't used any more + { "pos_size_std", TensorSpec{ {1, 3}, output_coord_scales_std_.val, output_coord_scales_std_.rows}}, + { "pos_size_scales", TensorSpec{ {1, 3}, output_coord_scales_std_.val, output_coord_scales_std_.rows}}, + { "pos_size_scales_tril", TensorSpec{ {1, 3, 3}, output_coord_scales_tril_.val, 9}} + }; + + qDebug() << "Pose model inputs (" << session_.GetInputCount() << ")"; + qDebug() << "Pose model outputs (" << session_.GetOutputCount() << "):"; + output_names_.resize(session_.GetOutputCount()); + output_c_names_.resize(session_.GetOutputCount()); + for (size_t i=0; i<session_.GetOutputCount(); ++i) + { + const std::string name = get_network_output_name(i); + const auto& output_info = session_.GetOutputTypeInfo(i); + const auto& onnx_tensor_spec = output_info.GetTensorTypeAndShapeInfo(); + auto my_tensor_spec_it = understood_outputs.find(name); + + qDebug() << "\t" << name.c_str() << " (" << onnx_tensor_spec.GetShape() << ") dtype: " << onnx_tensor_spec.GetElementType() << " " << + (my_tensor_spec_it != understood_outputs.end() ? "ok" : "unknown"); + + if (my_tensor_spec_it != understood_outputs.end()) + { + TensorSpec& t = my_tensor_spec_it->second; + if (onnx_tensor_spec.GetShape() != t.shape || + onnx_tensor_spec.GetElementType() != Ort::TypeToTensorType<float>::type) + throw std::runtime_error("Invalid output tensor spec for "s + name); + output_val_.push_back(Ort::Value::CreateTensor<float>( + allocator_info, t.buffer, t.element_count, t.shape.data(), t.shape.size())); + t.available = true; + } + else + { + // Create tensor regardless and ignore output + output_val_.push_back(create_tensor(output_info, allocator_)); + } + output_names_[i] = name; + output_c_names_[i] = output_names_[i].c_str(); + } + + has_uncertainty_ = understood_outputs.at("rotaxis_scales_tril").available || + understood_outputs.at("rotaxis_std").available; + has_uncertainty_ &= understood_outputs.at("pos_size_std").available || + understood_outputs.at("pos_size_scales").available || + understood_outputs.at("pos_size_scales_tril").available; + pos_scale_uncertainty_is_matrix_ = understood_outputs.at("pos_size_scales_tril").available; + + input_names_.resize(session_.GetInputCount()); + input_c_names_.resize(session_.GetInputCount()); + for (size_t i = 0; i < session_.GetInputCount(); ++i) + { + input_names_[i] = get_network_input_name(i); + input_c_names_[i] = input_names_[i].c_str(); + } + + assert (input_names_.size() == input_val_.size()); + assert (output_names_.size() == output_val_.size()); +} + + +std::optional<PoseEstimator::Face> PoseEstimator::run( + const cv::Mat &frame, const cv::Rect &box) +{ + cv::Mat cropped; + + const int patch_size = std::max(box.width, box.height)*1.05; + const cv::Point2f patch_center = { + std::clamp<float>(box.x + 0.5f*box.width, 0.f, frame.cols), + std::clamp<float>(box.y + 0.5f*box.height, 0.f, frame.rows) + }; + cv::getRectSubPix(frame, {patch_size, patch_size}, patch_center, cropped); + + // Will get failure if patch_center is outside image boundaries settings. + // Have to catch this case. + if (cropped.rows != patch_size || cropped.cols != patch_size) + return {}; + + [[maybe_unused]] auto* p = input_mat_.ptr(0); + + cv::resize(cropped, scaled_frame_, scaled_frame_.size(), 0, 0, cv::INTER_AREA); + + normalize_brightness(scaled_frame_, input_mat_); + + assert (input_mat_.ptr(0) == p); + assert (!input_mat_.empty() && input_mat_.isContinuous()); + + Timer t; t.start(); + + try + { + session_.Run( + Ort::RunOptions{ nullptr }, + input_c_names_.data(), + input_val_.data(), + input_val_.size(), + output_c_names_.data(), + output_val_.data(), + output_val_.size()); + } + catch (const Ort::Exception &e) + { + qDebug() << "Failed to run the model: " << e.what(); + return {}; + } + + last_inference_time_ = t.elapsed_ms(); + + // Perform coordinate transformation. + // From patch-local normalized in [-1,1] to + // frame unnormalized pixel. + + cv::Matx33f center_size_cov_tril = {}; + if (has_uncertainty_) + { + if (pos_scale_uncertainty_is_matrix_) + { + center_size_cov_tril = output_coord_scales_tril_; + } + else + { + center_size_cov_tril(0,0) = output_coord_scales_std_[0]; + center_size_cov_tril(1,1) = output_coord_scales_std_[1]; + center_size_cov_tril(2,2) = output_coord_scales_std_[2]; + } + center_size_cov_tril *= patch_size*0.5f; + } + + const cv::Point2f center = patch_center + + (0.5f*patch_size)*cv::Point2f{output_coord_[0], output_coord_[1]}; + const float size = patch_size*0.5f*output_coord_[2]; + + // Following Eigen which uses quat components in the order w, x, y, z. + // As does OpenCV + cv::Quatf rotation = { + output_quat_[3], + output_quat_[0], + output_quat_[1], + output_quat_[2] }; + + // Should be lower triangular. If not maybe something is wrong with memory layout ... or the model. + assert(output_rotaxis_scales_tril_(0, 1) == 0); + assert(output_rotaxis_scales_tril_(0, 2) == 0); + assert(output_rotaxis_scales_tril_(1, 2) == 0); + assert(center_size_cov_tril(0, 1) == 0); + assert(center_size_cov_tril(0, 2) == 0); + assert(center_size_cov_tril(1, 2) == 0); + + cv::Matx33f rotaxis_scales_tril = output_rotaxis_scales_tril_; + + if (model_version_ < 2) + { + // Due to a change in coordinate conventions + rotation = world_to_image(rotation); + } + + const cv::Rect2f outbox = { + patch_center.x + (0.5f*patch_size)*output_box_[0], + patch_center.y + (0.5f*patch_size)*output_box_[1], + 0.5f*patch_size*(output_box_[2]-output_box_[0]), + 0.5f*patch_size*(output_box_[3]-output_box_[1]) + }; + + return std::optional<Face>({ + rotation, rotaxis_scales_tril, outbox, center, size, center_size_cov_tril + }); +} + + +cv::Mat PoseEstimator::last_network_input() const +{ + assert(!input_mat_.empty()); + cv::Mat ret; + input_mat_.convertTo(ret, CV_8U, 255., 127.); + cv::cvtColor(ret, ret, cv::COLOR_GRAY2RGB); + return ret; +} + + +double PoseEstimator::last_inference_time_millis() const +{ + return last_inference_time_; +} + + + + + +} // namespace neuralnet_tracker_ns diff --git a/tracker-neuralnet/model_adapters.h b/tracker-neuralnet/model_adapters.h new file mode 100644 index 00000000..c1aaa6de --- /dev/null +++ b/tracker-neuralnet/model_adapters.h @@ -0,0 +1,105 @@ +#pragma once + +#include <optional> +#include <array> +#include <vector> +#include <string> + +#include <onnxruntime_cxx_api.h> +#include <opencv2/core.hpp> +#include "opencv_contrib.h" + + +namespace neuralnet_tracker_ns +{ + +// Generally useful sigmoid function +float sigmoid(float x); + + +class Localizer +{ + public: + Localizer(Ort::MemoryInfo &allocator_info, + Ort::Session &&session); + + // Returns bounding wrt image coordinate of the input image + // The preceeding float is the score for being a face normalized to [0,1]. + std::pair<float, cv::Rect2f> run( + const cv::Mat &frame); + + double last_inference_time_millis() const; + private: + inline static constexpr int INPUT_IMG_WIDTH = 288; + inline static constexpr int INPUT_IMG_HEIGHT = 224; + Ort::Session session_{nullptr}; + // Inputs / outputs + cv::Mat scaled_frame_{}, input_mat_{}; + Ort::Value input_val_{nullptr}, output_val_{nullptr}; + std::array<float, 5> results_; + double last_inference_time_ = 0; +}; + + +class PoseEstimator +{ + public: + struct Face + { + cv::Quatf rotation; + cv::Matx33f rotaxis_cov_tril; // Lower triangular factor of Cholesky decomposition + cv::Rect2f box; + cv::Point2f center; + float size; + cv::Matx33f center_size_cov_tril; // Lower triangular factor of Cholesky decomposition + }; + + PoseEstimator(Ort::MemoryInfo &allocator_info, + Ort::Session &&session); + /** Inference + * + * Coordinates are defined wrt. the image space of the input `frame`. + * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom) + */ + std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box); + // Returns an image compatible with the 'frame' image for displaying. + cv::Mat last_network_input() const; + double last_inference_time_millis() const; + bool has_uncertainty() const { return has_uncertainty_; } + + private: + std::string get_network_input_name(size_t i) const; + std::string get_network_output_name(size_t i) const; + int64_t model_version_ = 0; // Queried meta data from the ONNX file + Ort::Session session_{nullptr}; // ONNX's runtime context for running the model + mutable Ort::Allocator allocator_; // Memory allocator for tensors + // Inputs + cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?) + std::vector<Ort::Value> input_val_; // Tensors to put into the model + std::vector<std::string> input_names_; // Refers to the names in the onnx model. + std::vector<const char *> input_c_names_; // Refers to the C names in the onnx model. + // Outputs + cv::Vec<float, 3> output_coord_{}; // 2d Coordinate and head size output. + cv::Vec<float, 4> output_quat_{}; // Quaternion output + cv::Vec<float, 4> output_box_{}; // Bounding box output + cv::Matx33f output_rotaxis_scales_tril_{}; // Lower triangular matrix of LLT factorization of covariance of rotation vector as offset from output quaternion + cv::Matx33f output_coord_scales_tril_{}; // Lower triangular factor + cv::Vec3f output_coord_scales_std_{}; // Depending on the model, alternatively a 3d vector with standard deviations. + std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in. + std::vector<std::string> output_names_; // Refers to the names in the onnx model. + std::vector<const char *> output_c_names_; // Refers to the C names in the onnx model. + // More bookkeeping + double last_inference_time_ = 0; + bool has_uncertainty_ = false; + bool pos_scale_uncertainty_is_matrix_ = false; +}; + + +// Finds the intensity where x percent of pixels have less intensity than that. +int find_input_intensity_quantile(const cv::Mat& frame, float percentage); + +// Adjust brightness levels to full range and scales the value range to [-0.5, 0.5] +void normalize_brightness(const cv::Mat& frame, cv::Mat& out); + + +} // namespace neuralnet_tracker_ns diff --git a/tracker-neuralnet/models/head-localizer.onnx b/tracker-neuralnet/models/head-localizer.onnx Binary files differnew file mode 100644 index 00000000..c128f89d --- /dev/null +++ b/tracker-neuralnet/models/head-localizer.onnx diff --git a/tracker-neuralnet/models/head-pose-0.2-big.onnx b/tracker-neuralnet/models/head-pose-0.2-big.onnx Binary files differnew file mode 100644 index 00000000..e53fd831 --- /dev/null +++ b/tracker-neuralnet/models/head-pose-0.2-big.onnx diff --git a/tracker-neuralnet/models/head-pose-0.2-small.onnx b/tracker-neuralnet/models/head-pose-0.2-small.onnx Binary files differnew file mode 100644 index 00000000..f2b64219 --- /dev/null +++ b/tracker-neuralnet/models/head-pose-0.2-small.onnx diff --git a/tracker-neuralnet/models/head-pose-0.3-big-quantized.onnx b/tracker-neuralnet/models/head-pose-0.3-big-quantized.onnx Binary files differnew file mode 100644 index 00000000..7f875c63 --- /dev/null +++ b/tracker-neuralnet/models/head-pose-0.3-big-quantized.onnx diff --git a/tracker-neuralnet/neuralnet-tracker.qrc b/tracker-neuralnet/neuralnet-tracker.qrc new file mode 100644 index 00000000..d30ec313 --- /dev/null +++ b/tracker-neuralnet/neuralnet-tracker.qrc @@ -0,0 +1,5 @@ +<RCC> + <qresource prefix="/"> + <file>images/neuralnet.png</file> + </qresource> +</RCC> diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui new file mode 100644 index 00000000..ae2450b4 --- /dev/null +++ b/tracker-neuralnet/neuralnet-trackercontrols.ui @@ -0,0 +1,697 @@ +<?xml version="1.0" encoding="UTF-8"?> +<ui version="4.0"> + <class>Form</class> + <widget class="QWidget" name="Form"> + <property name="windowModality"> + <enum>Qt::NonModal</enum> + </property> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>651</width> + <height>432</height> + </rect> + </property> + <property name="windowTitle"> + <string>Tracker settings</string> + </property> + <layout class="QGridLayout" name="gridLayout"> + <item row="4" column="0"> + <widget class="QGroupBox" name="groupBox_10"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="autoFillBackground"> + <bool>true</bool> + </property> + <property name="title"> + <string>Head Center Offset</string> + </property> + <layout class="QGridLayout" name="gridLayout_5"> + <item row="0" column="0"> + <widget class="QFrame" name="frame_4"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="maximumSize"> + <size> + <width>16777215</width> + <height>16777215</height> + </size> + </property> + <property name="frameShape"> + <enum>QFrame::NoFrame</enum> + </property> + <property name="frameShadow"> + <enum>QFrame::Raised</enum> + </property> + <layout class="QGridLayout" name="gridLayout_11"> + <property name="sizeConstraint"> + <enum>QLayout::SetDefaultConstraint</enum> + </property> + <property name="verticalSpacing"> + <number>0</number> + </property> + <item row="2" column="0"> + <widget class="QLabel" name="label_66"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>Right</string> + </property> + </widget> + </item> + <item row="0" column="0"> + <widget class="QLabel" name="label_61"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>Forward</string> + </property> + </widget> + </item> + <item row="1" column="0"> + <widget class="QLabel" name="label_62"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>Up</string> + </property> + </widget> + </item> + <item row="0" column="1"> + <widget class="QSpinBox" name="tx_spin"> + <property name="maximumSize"> + <size> + <width>150</width> + <height>16777215</height> + </size> + </property> + <property name="suffix"> + <string> mm</string> + </property> + <property name="minimum"> + <number>-65535</number> + </property> + <property name="maximum"> + <number>65536</number> + </property> + </widget> + </item> + <item row="1" column="1"> + <widget class="QSpinBox" name="ty_spin"> + <property name="maximumSize"> + <size> + <width>150</width> + <height>16777215</height> + </size> + </property> + <property name="suffix"> + <string> mm</string> + </property> + <property name="minimum"> + <number>-65535</number> + </property> + <property name="maximum"> + <number>65536</number> + </property> + </widget> + </item> + <item row="2" column="1"> + <widget class="QSpinBox" name="tz_spin"> + <property name="maximumSize"> + <size> + <width>150</width> + <height>16777215</height> + </size> + </property> + <property name="suffix"> + <string> mm</string> + </property> + <property name="minimum"> + <number>-65535</number> + </property> + <property name="maximum"> + <number>65536</number> + </property> + </widget> + </item> + </layout> + </widget> + </item> + <item row="0" column="1"> + <widget class="QFrame" name="frame_5"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Expanding"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="minimumSize"> + <size> + <width>260</width> + <height>0</height> + </size> + </property> + <property name="frameShape"> + <enum>QFrame::NoFrame</enum> + </property> + <property name="frameShadow"> + <enum>QFrame::Raised</enum> + </property> + <layout class="QVBoxLayout" name="verticalLayout_2"> + <item> + <widget class="QLabel" name="label_59"> + <property name="text"> + <string>Use only yaw and pitch while calibrating. +Don't roll or change position.</string> + </property> + <property name="alignment"> + <set>Qt::AlignCenter</set> + </property> + <property name="wordWrap"> + <bool>true</bool> + </property> + <property name="openExternalLinks"> + <bool>false</bool> + </property> + </widget> + </item> + <item> + <widget class="QLabel" name="sample_count_display"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Minimum" vsizetype="Maximum"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="frameShape"> + <enum>QFrame::Panel</enum> + </property> + <property name="frameShadow"> + <enum>QFrame::Sunken</enum> + </property> + <property name="text"> + <string/> + </property> + <property name="wordWrap"> + <bool>true</bool> + </property> + </widget> + </item> + <item> + <widget class="QPushButton" name="tcalib_button"> + <property name="enabled"> + <bool>false</bool> + </property> + <property name="text"> + <string>Start calibration</string> + </property> + <property name="checkable"> + <bool>true</bool> + </property> + </widget> + </item> + </layout> + </widget> + </item> + </layout> + </widget> + </item> + <item row="8" column="0"> + <widget class="QDialogButtonBox" name="buttonBox"> + <property name="standardButtons"> + <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QGroupBox" name="groupBox"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="autoFillBackground"> + <bool>true</bool> + </property> + <property name="title"> + <string>Camera Configuration</string> + </property> + <property name="flat"> + <bool>false</bool> + </property> + <property name="checkable"> + <bool>false</bool> + </property> + <layout class="QHBoxLayout" name="horizontalLayout"> + <property name="spacing"> + <number>10</number> + </property> + <property name="bottomMargin"> + <number>8</number> + </property> + <item> + <layout class="QGridLayout" name="gridLayout_3"> + <property name="sizeConstraint"> + <enum>QLayout::SetDefaultConstraint</enum> + </property> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <property name="horizontalSpacing"> + <number>0</number> + </property> + <property name="verticalSpacing"> + <number>2</number> + </property> + <item row="0" column="1"> + <widget class="QComboBox" name="cameraName"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + </widget> + </item> + <item row="1" column="0"> + <widget class="QLabel" name="label_9"> + <property name="text"> + <string>Diagonal FOV</string> + </property> + </widget> + </item> + <item row="0" column="0"> + <widget class="QLabel" name="label_10"> + <property name="text"> + <string>Camera name</string> + </property> + </widget> + </item> + <item row="1" column="1"> + <widget class="QSpinBox" name="cameraFOV"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="toolTip"> + <string>Field of view. Needed to transform the pose to world coordinates.</string> + </property> + <property name="locale"> + <locale language="English" country="UnitedStates"/> + </property> + <property name="minimum"> + <number>35</number> + </property> + <property name="maximum"> + <number>90</number> + </property> + </widget> + </item> + <item row="2" column="1"> + <widget class="QComboBox" name="resolution"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="toolTip"> + <string>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</string> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QLabel" name="resolution_label"> + <property name="text"> + <string>Resolution</string> + </property> + </widget> + </item> + </layout> + </item> + <item> + <layout class="QGridLayout" name="gridLayout_6"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <property name="horizontalSpacing"> + <number>0</number> + </property> + <property name="verticalSpacing"> + <number>2</number> + </property> + <item row="4" column="1"> + <widget class="QComboBox" name="cameraFPS"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="toolTip"> + <string>Requested video frame rate. Actual setting may not be supported by the camera.</string> + </property> + </widget> + </item> + <item row="4" column="0"> + <widget class="QLabel" name="label_12"> + <property name="text"> + <string>Frames per second</string> + </property> + </widget> + </item> + <item row="2" column="1"> + <widget class="QCheckBox" name="use_mjpeg"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Minimum" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="minimumSize"> + <size> + <width>0</width> + <height>0</height> + </size> + </property> + <property name="text"> + <string/> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QLabel" name="label_11"> + <property name="text"> + <string>MJPEG</string> + </property> + </widget> + </item> + <item row="0" column="1"> + <widget class="QPushButton" name="camera_settings"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>Camera settings</string> + </property> + </widget> + </item> + </layout> + </item> + </layout> + </widget> + </item> + <item row="7" column="0"> + <widget class="QLabel" name="resolution_display"> + <property name="autoFillBackground"> + <bool>true</bool> + </property> + <property name="frameShape"> + <enum>QFrame::Panel</enum> + </property> + <property name="frameShadow"> + <enum>QFrame::Sunken</enum> + </property> + <property name="text"> + <string notr="true"/> + </property> + </widget> + </item> + <item row="5" column="0"> + <widget class="QGroupBox" name="tuningOptionsBox"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="minimumSize"> + <size> + <width>0</width> + <height>0</height> + </size> + </property> + <property name="autoFillBackground"> + <bool>true</bool> + </property> + <property name="title"> + <string>Tuning / Debug</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout"> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_3"> + <item> + <widget class="QLabel" name="threadCountLabel"> + <property name="text"> + <string>Thread Count</string> + </property> + </widget> + </item> + <item> + <widget class="QSpinBox" name="threadCount"> + <property name="toolTip"> + <string>Number of threads. Can be used to balance the CPU load between the game and the tracker.</string> + </property> + <property name="minimum"> + <number>1</number> + </property> + <property name="maximum"> + <number>32</number> + </property> + </widget> + </item> + <item> + <widget class="Line" name="line"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + </widget> + </item> + <item> + <widget class="QCheckBox" name="showNetworkInput"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="toolTip"> + <string>Show the image patch that the pose estimation model sees.</string> + </property> + <property name="text"> + <string>Show Network Input</string> + </property> + </widget> + </item> + <item> + <widget class="Line" name="line_2"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + </widget> + </item> + <item> + <widget class="QLabel" name="roiFilterAlphaLabel"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Minimum" vsizetype="Minimum"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string>ROI Smoothing Alpha</string> + </property> + </widget> + </item> + <item> + <widget class="QDoubleSpinBox" name="roiFilterAlpha"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="maximumSize"> + <size> + <width>150</width> + <height>16777215</height> + </size> + </property> + <property name="toolTip"> + <string>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</string> + </property> + <property name="wrapping"> + <bool>false</bool> + </property> + <property name="decimals"> + <number>2</number> + </property> + <property name="maximum"> + <double>1.000000000000000</double> + </property> + <property name="singleStep"> + <double>0.010000000000000</double> + </property> + <property name="value"> + <double>1.000000000000000</double> + </property> + </widget> + </item> + <item> + <widget class="QLabel" name="roiZoomLabel"> + <property name="text"> + <string>ROI Zoom</string> + </property> + </widget> + </item> + <item> + <widget class="QDoubleSpinBox" name="roiZoom"> + <property name="toolTip"> + <string>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</string> + </property> + <property name="minimum"> + <double>0.100000000000000</double> + </property> + <property name="maximum"> + <double>2.000000000000000</double> + </property> + <property name="singleStep"> + <double>0.010000000000000</double> + </property> + <property name="value"> + <double>1.000000000000000</double> + </property> + </widget> + </item> + <item> + <spacer name="horizontalSpacer"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>40</width> + <height>20</height> + </size> + </property> + </spacer> + </item> + </layout> + </item> + <item> + <widget class="QFrame" name="network_select_frame"> + <property name="frameShape"> + <enum>QFrame::NoFrame</enum> + </property> + <property name="frameShadow"> + <enum>QFrame::Raised</enum> + </property> + <layout class="QHBoxLayout" name="horizontalLayout_2"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QPushButton" name="posenetSelectButton"> + <property name="toolTip"> + <string>Select the pose network. Changes take affect on the next tracker start</string> + </property> + <property name="text"> + <string>Select Pose Net ONNX</string> + </property> + </widget> + </item> + <item> + <widget class="QLabel" name="posenetFileDisplay"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Expanding" vsizetype="Preferred"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string><the pose net file></string> + </property> + </widget> + </item> + </layout> + </widget> + </item> + </layout> + </widget> + </item> + </layout> + </widget> + <resources/> + <connections/> + <designerdata> + <property name="gridDeltaX"> + <number>10</number> + </property> + <property name="gridDeltaY"> + <number>10</number> + </property> + <property name="gridSnapX"> + <bool>false</bool> + </property> + <property name="gridSnapY"> + <bool>false</bool> + </property> + <property name="gridVisible"> + <bool>true</bool> + </property> + </designerdata> +</ui> diff --git a/tracker-neuralnet/opencv_contrib.h b/tracker-neuralnet/opencv_contrib.h new file mode 100644 index 00000000..1c199025 --- /dev/null +++ b/tracker-neuralnet/opencv_contrib.h @@ -0,0 +1,120 @@ +#pragma once + +#include <opencv2/core.hpp> +#include <opencv2/core/base.hpp> +#include <opencv2/core/quaternion.hpp> + +// Well eventually it might be a contribution + +namespace cvcontrib +{ + + +template<class T> +cv::Point_<T> as_point(const cv::Size_<T>& s) +{ + return { s.width, s.height }; +} + + +template<class T> +cv::Size_<T> as_size(const cv::Point_<T>& p) +{ + return { p.x, p.y }; +} + + +template<int n, int m> +inline bool allfinite(const cv::Matx<float, n, m> &mat) +{ + const size_t sz = mat.rows*mat.cols; + for (size_t i=0; i<sz; ++i) + if (!std::isfinite(mat.val[i])) + return false; + return true; +} + + +// Because compiler refuses to convert it automatically +template<int n> +inline cv::Vec<float, n> to_vec(const cv::Matx<float, n, 1>& m) +{ + return cv::Vec<float,n>{m.val}; +} + + +template<int n, int m, int o> +inline void set_minor(cv::Vec<float, m> &dst, const int startrow, const cv::Matx<float, o, 1> &src) +{ + assert (startrow>=0 && startrow+n <= dst.rows); + for (int row=startrow, i=0; row<startrow+n; ++row,++i) + { + dst[row] = src(i,0); + } +} + + +template<int nrows, int ncols, int m, int n> +inline void set_minor(cv::Matx<float, m, n>& dst, const int startrow, int startcol, const cv::Matx<float, nrows, ncols> &src) +{ + assert (startrow>=0 && startrow+nrows <= dst.rows); + assert (startcol>=0 && startcol+ncols <= dst.cols); + for (int row=startrow, i=0; row<startrow+nrows; ++row,++i) + { + for (int col=startcol, j=0; col<startcol+ncols; ++col,++j) + { + dst(row, col) = src(i,j); + } + } +} + + +inline cv::Quatf identity_quat() +{ + return cv::Quatf(1,0,0,0); +} + + +inline cv::Vec3f toRotVec(const cv::Quatf& q) +{ + // This is an improved implementation +#if 1 + // w = cos(alpha/2) + // xyz = sin(alpha/2)*axis + static constexpr float eps = 1.e-12f; + const cv::Vec3f xyz{q.x, q.y, q.z}; + const float len = cv::norm(xyz); + const float angle = std::atan2(len, q.w)*2.f; + return xyz*(angle/(len+eps)); +#else + // The opencv implementation fails even the simplest test: + // out = toRVec(cv::Quatf{1., 0., 0., 0. }); + // ASSERT_TRUE(std::isfinite(out[0]) && std::isfinite(out[1]) && std::isfinite(out[2])); + return q.toRotVec(); +#endif +} + + +inline cv::Vec3f rotate(const cv::Quatf& q, const cv::Vec3f &v) +{ + const auto r = q * cv::Quatf{0., v[0], v[1], v[2]} * q.conjugate(); + return { r.x, r.y, r.z }; +} + + +template<int n> +inline cv::Matx<float, n, n> cholesky(const cv::Matx<float, n, n>& mat) +{ + cv::Matx<float, n, n> l = mat; + // Der Code ist die Doku! + // https://github.com/opencv/opencv/blob/4.5.4/modules/core/src/matrix_decomp.cpp#L95 + cv::Cholesky(l.val, l.cols * sizeof(float), n, nullptr, 0, 0); + // It doesn't clear the upper triangle so we do it for it. + for (int row=0; row<n; ++row) + for (int col=row+1; col<n; ++col) + l(row, col) = 0.f; + return l; +} + + +} // namespace cvcontrib
\ No newline at end of file diff --git a/tracker-neuralnet/preview.cpp b/tracker-neuralnet/preview.cpp new file mode 100644 index 00000000..76a6bbc0 --- /dev/null +++ b/tracker-neuralnet/preview.cpp @@ -0,0 +1,135 @@ +#include "preview.h" + + +namespace neuralnet_tracker_ns +{ + + +cv::Rect make_crop_rect_for_aspect(const cv::Size &size, int aspect_w, int aspect_h) +{ + auto [w, h] = size; + if ( w*aspect_h > aspect_w*h ) + { + // Image is too wide + const int new_w = (aspect_w*h)/aspect_h; + return cv::Rect((w - new_w)/2, 0, new_w, h); + } + else + { + const int new_h = (aspect_h*w)/aspect_w; + return cv::Rect(0, (h - new_h)/2, w, new_h); + } +} + + + + +void Preview::init(const cv_video_widget& widget) +{ + auto [w,h] = widget.preview_size(); + preview_size_ = { w, h }; +} + + +void Preview::copy_video_frame(const cv::Mat& frame) +{ + cv::Rect roi = make_crop_rect_for_aspect(frame.size(), preview_size_.width, preview_size_.height); + + cv::resize(frame(roi), preview_image_, preview_size_, 0, 0, cv::INTER_NEAREST); + + offset_ = { (float)-roi.x, (float)-roi.y }; + scale_ = float(preview_image_.cols) / float(roi.width); +} + + +void Preview::draw_gizmos( + const std::optional<PoseEstimator::Face> &face, + const std::optional<cv::Rect2f>& last_roi, + const std::optional<cv::Rect2f>& last_localizer_roi, + const cv::Point2f& neckjoint_position) +{ + if (preview_image_.empty()) + return; + + if (last_roi) + { + const int col = 255; + cv::rectangle(preview_image_, transform(*last_roi), cv::Scalar(0, col, 0), /*thickness=*/1); + } + if (last_localizer_roi) + { + const int col = 255; + cv::rectangle(preview_image_, transform(*last_localizer_roi), cv::Scalar(col, 0, 255-col), /*thickness=*/1); + } + + if (face) + { + if (face->size>=1.f) + cv::circle(preview_image_, static_cast<cv::Point>(transform(face->center)), int(transform(face->size)), cv::Scalar(255,255,255), 2); + cv::circle(preview_image_, static_cast<cv::Point>(transform(face->center)), 3, cv::Scalar(255,255,255), -1); + + const cv::Matx33f R = face->rotation.toRotMat3x3(cv::QUAT_ASSUME_UNIT); + + auto draw_coord_line = [&](int i, const cv::Scalar& color) + { + const float vx = R(0,i); + const float vy = R(1,i); + static constexpr float len = 100.f; + cv::Point q = face->center + len*cv::Point2f{vx, vy}; + cv::line(preview_image_, static_cast<cv::Point>(transform(face->center)), static_cast<cv::Point>(transform(q)), color, 2); + }; + draw_coord_line(0, {0, 0, 255}); + draw_coord_line(1, {0, 255, 0}); + draw_coord_line(2, {255, 0, 0}); + + // Draw the computed joint position + auto xy = transform(neckjoint_position); + cv::circle(preview_image_, cv::Point(xy.x,xy.y), 5, cv::Scalar(0,0,255), -1); + } + + +} + +void Preview::overlay_netinput(const cv::Mat& netinput) +{ + if (netinput.empty()) + return; + + const int w = std::min(netinput.cols, preview_image_.cols); + const int h = std::min(netinput.rows, preview_image_.rows); + cv::Rect roi(0, 0, w, h); + netinput(roi).copyTo(preview_image_(roi)); +} + +void Preview::draw_fps(double fps, double last_inference_time) +{ + char buf[128]; + ::snprintf(buf, sizeof(buf), "%d Hz, pose inference: %d ms", std::clamp(int(fps), 0, 9999), int(last_inference_time)); + cv::putText(preview_image_, buf, cv::Point(10, preview_image_.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1); +} + + +void Preview::copy_to_widget(cv_video_widget& widget) +{ + if (preview_image_.rows > 0) + widget.update_image(preview_image_); +} + + +cv::Rect2f Preview::transform(const cv::Rect2f& r) const +{ + return { (r.x - offset_.x)*scale_, (r.y - offset_.y)*scale_, r.width*scale_, r.height*scale_ }; +} + +cv::Point2f Preview::transform(const cv::Point2f& p) const +{ + return { (p.x - offset_.x)*scale_ , (p.y - offset_.y)*scale_ }; +} + +float Preview::transform(float s) const +{ + return s * scale_; +} + + +}
\ No newline at end of file diff --git a/tracker-neuralnet/preview.h b/tracker-neuralnet/preview.h new file mode 100644 index 00000000..adc12993 --- /dev/null +++ b/tracker-neuralnet/preview.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2021 Michael Welter <michael@welter-4d.de> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + */ + +#pragma once + +#include "model_adapters.h" + +#include "cv/video-widget.hpp" + +#include <optional> + +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> + + +namespace neuralnet_tracker_ns +{ + +/** Makes a maximum size cropping rect with the given aspect. +* @param aspect_w: nominator of the aspect ratio +* @param aspect_h: denom of the aspect ratio +*/ +cv::Rect make_crop_rect_for_aspect(const cv::Size &size, int aspect_w, int aspect_h); + + +/** This class is responsible for drawing the debug/info gizmos +* +* In addition there function to transform the inputs to the size of +* the preview image which can be different from the camera frame. +*/ +class Preview +{ +public: + void init(const cv_video_widget& widget); + void copy_video_frame(const cv::Mat& frame); + void draw_gizmos( + const std::optional<PoseEstimator::Face> &face, + const std::optional<cv::Rect2f>& last_roi, + const std::optional<cv::Rect2f>& last_localizer_roi, + const cv::Point2f& neckjoint_position); + void overlay_netinput(const cv::Mat& netinput); + void draw_fps(double fps, double last_inference_time); + void copy_to_widget(cv_video_widget& widget); +private: + // Transform from camera image to preview + cv::Rect2f transform(const cv::Rect2f& r) const; + cv::Point2f transform(const cv::Point2f& p) const; + float transform(float s) const; + + cv::Mat preview_image_; + cv::Size preview_size_ = { 0, 0 }; + float scale_ = 1.f; + cv::Point2f offset_ = { 0.f, 0.f}; +}; + +} // neuralnet_tracker_ns
\ No newline at end of file diff --git a/tracker-neuralnet/redist/vcomp140.dll b/tracker-neuralnet/redist/vcomp140.dll Binary files differnew file mode 100644 index 00000000..42c069b9 --- /dev/null +++ b/tracker-neuralnet/redist/vcomp140.dll diff --git a/tracker-neuralnet/tests.cpp b/tracker-neuralnet/tests.cpp new file mode 100644 index 00000000..b1d2a6d0 --- /dev/null +++ b/tracker-neuralnet/tests.cpp @@ -0,0 +1,58 @@ +#include "model_adapters.h" + +#include <algorithm> +#include <numeric> +#include <cstdio> + +namespace neuralnet_tracker_tests +{ + + +void assert_(bool ok, const std::string& msg) +{ + if (ok) + return; + std::cout << msg << std::endl; + std::exit(-1); +} + + +void test_find_input_intensity_quantile() +{ + cv::Mat data(10,10, CV_8UC1); + std::iota(data.begin<uint8_t>(), data.end<uint8_t>(), 0); + + const float pct = 90; + + const int val = neuralnet_tracker_ns::find_input_intensity_quantile(data, pct); + + assert_(val == int(10*10*pct/100.f), "test_find_input_intensity_quantile failed"); +} + + +void test_normalize_brightness() +{ + cv::Mat data(10,10, CV_8UC1); + std::iota(data.begin<uint8_t>(), data.end<uint8_t>(), 0); + + cv::Mat out; + neuralnet_tracker_ns::normalize_brightness(data, out); + + auto [minit,maxit] = std::minmax_element(out.begin<float>(),out.end<float>()); + const auto minval = *minit; + const auto maxval = *maxit; + assert_(std::abs(minval + 0.5f) < 0.02, "test_normalize_brightness failed"); + // If the brightest value is lower than half-max, it will be boosted to half-max. + // Otherwise it will just be rescaled to [-.5, 0.5 ]. Here we have the low-brightness case. + assert_(std::abs(maxval - 0.0f) < 0.02, "test_normalize_brightness failed"); +} + + +void run() +{ + test_find_input_intensity_quantile(); + test_normalize_brightness(); +} + + +}
\ No newline at end of file diff --git a/tracker-neuralnet/unscented_trafo.h b/tracker-neuralnet/unscented_trafo.h new file mode 100644 index 00000000..267aa969 --- /dev/null +++ b/tracker-neuralnet/unscented_trafo.h @@ -0,0 +1,132 @@ +#pragma once + +#include <algorithm> +#include <opencv2/core.hpp> +#include <opencv2/core/base.hpp> +#include <opencv2/core/quaternion.hpp> + +#include <cmath> +#include <vector> + +#include "opencv_contrib.h" + +namespace ukf_cv +{ + +using namespace cvcontrib; + +template<int dim, int otherdim = dim> +using SigmaPoints = std::array<cv::Vec<float,otherdim>,dim*2+1>; + + +// Ported from +// https://filterpy.readthedocs.io/en/latest/_modules/filterpy/kalman/sigma_points.html +// Excerpt from the original docu: +// " + +// Generates sigma points and weights according to Van der Merwe's +// 2004 dissertation[1] for the UnscentedKalmanFilter class.. It +// parametizes the sigma points using alpha, beta, kappa terms, and +// is the version seen in most publications. + +// Unless you know better, this should be your default choice. + +// alpha : float +// Determins the spread of the sigma points around the mean. +// Usually a small positive value (1e-3) according to [3]. + +// beta : float +// Incorporates prior knowledge of the distribution of the mean. For +// Gaussian x beta=2 is optimal, according to [3]. + +// kappa : float, default=0.0 +// Secondary scaling parameter usually set to 0 according to [4], +// or to 3-n according to [5]. + +// Reference +// .. [1] R. Van der Merwe "Sigma-Point Kalman Filters for Probabilitic +// Inference in Dynamic State-Space Models" (Doctoral dissertation) + +// " +template<int dim> +class MerweScaledSigmaPoints +{ +public: + static constexpr int num_sigmas = 2*dim+1; + + using Vector = cv::Vec<float,dim>; + using Matrix = cv::Matx<float,dim,dim>; + + MerweScaledSigmaPoints(float alpha = 0.01, float beta = 2., int kappa = 3-dim) + { + lambda = alpha*alpha * (dim + kappa) - dim; + const float c = .5 / (dim + lambda); + Wc_i = c; + Wm_i = c; + Wm_0 = lambda / (dim+lambda); + Wc_0 = Wm_0 + (1.-alpha*alpha + beta); + } + + SigmaPoints<dim> compute_sigmas(const Vector &mu, const Matrix &mat, bool is_tril_factor) const + { + const Matrix triu_factor = is_tril_factor ? mat.t() : cholesky(mat).t(); + + const Matrix U = triu_factor*std::sqrt(lambda+dim); + + SigmaPoints<dim> sigmas; + + sigmas[0] = mu; + for (int k=0; k<dim; ++k) + { + sigmas[k+1] = to_vec(mu + U.row(k).t()); + sigmas[dim+k+1] = to_vec(mu - U.row(k).t()); + } + return sigmas; + } + + template<int otherdim> + std::tuple<cv::Vec<float,otherdim> , cv::Matx<float,otherdim,otherdim>> compute_statistics(const SigmaPoints<dim,otherdim> &sigmas) const + { + cv::Vec<float,otherdim> mu{}; // Zero initializes + for (size_t i=0; i<sigmas.size(); ++i) + { + mu += to_vec((i==0 ? Wm_0 : Wm_i) * sigmas[i]); + } + + cv::Matx<float,otherdim,otherdim> cov{}; + for (size_t i=0; i<sigmas.size(); ++i) + { + const auto p = sigmas[i] - mu; + cov += (i==0 ? Wc_0 : Wc_i)*p*p.t(); + } + + return { mu, cov }; + } + + template<int otherdim> + cv::Matx<float,dim,otherdim> compute_cov(const SigmaPoints<dim,dim> &sigmas, const SigmaPoints<dim,otherdim> &othersigmas) const + { + cv::Vec<float,dim> mu{}; // Zero initializes + cv::Vec<float,otherdim> mu_other{}; // Zero initializes + for (size_t i=0; i<sigmas.size(); ++i) + { + mu += to_vec((i==0 ? Wm_0 : Wm_i) * sigmas[i]); + mu_other += to_vec((i==0 ? Wm_0 : Wm_i) * othersigmas[i]); + } + + cv::Matx<float,dim,otherdim> cov{}; + for (size_t i=0; i<sigmas.size(); ++i) + { + const auto p = sigmas[i] - mu; + const auto q = othersigmas[i] - mu_other; + cov += (i==0 ? Wc_0 : Wc_i)*p*q.t(); + } + + return cov; + } +private: + float Wc_i, Wm_i, Wm_0, Wc_0, lambda; +}; + + +} // namespace ukf_cv
\ No newline at end of file |