diff options
| author | Stanislaw Halik <sthalik@misaki.pl> | 2022-05-26 18:04:23 +0000 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-05-26 18:04:23 +0000 | 
| commit | 015d1298cb8f853cc4b0cae6a62c13af130791b7 (patch) | |
| tree | 2df114150815a442c710532914ba777067b317eb | |
| parent | db54d8eb86b06e6c7ea44ff0c112c22ab85305c3 (diff) | |
| parent | bbce410b03c2bb36cf2a07b1394fa04bf4f04c70 (diff) | |
Merge pull request #1476 from DaWelter/master
| -rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 990 | ||||
| -rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.h | 191 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/nl_NL.ts | 58 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/ru_RU.ts | 58 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/stub.ts | 58 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/zh_CN.ts | 58 | ||||
| -rw-r--r-- | tracker-neuralnet/neuralnet-trackercontrols.ui | 794 | 
7 files changed, 1587 insertions, 620 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index 3687a6cd..62209978 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -15,6 +15,7 @@  #include <opencv2/calib3d.hpp>  #include <opencv2/imgcodecs.hpp>  #include "compat/timer.hpp" +#include "compat/check-visible.hpp"  #include <omp.h>  #ifdef _MSC_VER @@ -29,20 +30,22 @@  #include <cmath>  #include <algorithm>  #include <chrono> +#include <string> +#include <stdexcept> +  // Some demo code for onnx  // https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp  // https://github.com/leimao/ONNX-Runtime-Inference/blob/main/src/inference.cpp -namespace +namespace neuralnet_tracker_ns  { +  using numeric_types::vec3;  using numeric_types::vec2;  using numeric_types::mat33; - -// Minimal difference if at all going from 1 to 2 threads. -static constexpr int num_threads = 1; +using quat = std::array<numeric_types::f,4>;  #if _MSC_VER @@ -52,12 +55,52 @@ std::string convert(const QString &s) { return s.toStdString(); }  #endif +template<class F> +struct OnScopeExit +{ +    explicit OnScopeExit(F&& f) : f_{ f } {} +    ~OnScopeExit() noexcept +    { +        f_(); +    } +    F f_; +}; + +  float sigmoid(float x)  {      return 1.f/(1.f + std::exp(-x));  } +cv::Rect make_crop_rect_for_aspect(const cv::Size &size, int aspect_w, int aspect_h) +{ +    auto [w, h] = size; +    if ( w*aspect_h > aspect_w*h ) +    { +        // Image is too wide +        const int new_w = (aspect_w*h)/aspect_h; +        return cv::Rect((w - new_w)/2, 0, new_w, h); +    } +    else +    { +        const int new_h = (aspect_h*w)/aspect_w; +        return cv::Rect(0, (h - new_h)/2, w, new_h); +    } +} + +cv::Rect make_crop_rect_multiple_of(const cv::Size &size, int multiple) +{ +    const int new_w = (size.width / multiple) * multiple; +    const int new_h = (size.height / multiple) * multiple; +    return cv::Rect( +        (size.width-new_w)/2, +        (size.height-new_h)/2, +        new_w, +        new_h +    ); +} +  template<class T>  cv::Rect_<T> squarize(const cv::Rect_<T> &r)  { @@ -67,6 +110,41 @@ cv::Rect_<T> squarize(const cv::Rect_<T> &r)  } +template<class T> +cv::Point_<T> as_point(const cv::Size_<T>& s) +{ +    return { s.width, s.height }; +} + + +template<class T> +cv::Size_<T> as_size(const cv::Point_<T>& p) +{ +    return { p.x, p.y }; +} + + +template<class T> +cv::Rect_<T> expand(const cv::Rect_<T>& r, T factor) +{ +    // xnew = l+.5*w - w*f*0.5 = l + .5*(w - new_w) +    const cv::Size_<T> new_size = { r.width * factor, r.height * factor }; +    const cv::Point_<T> new_tl = r.tl() + (as_point(r.size()) - as_point(new_size)) / T(2); +    return cv::Rect_<T>(new_tl, new_size); +} + + +template<class T> +cv::Rect_<T> ewa_filter(const cv::Rect_<T>& last, const cv::Rect_<T>& current, T alpha) +{ +    const auto last_center = T(0.5) * (last.tl() + last.br()); +    const auto cur_center = T(0.5) * (current.tl() + current.br()); +    const cv::Point_<T> new_size = as_point(last.size()) + alpha * (as_point(current.size()) - as_point(last.size())); +    const cv::Point_<T> new_center = last_center + alpha * (cur_center - last_center); +    return cv::Rect_<T>(new_center - T(0.5) * new_size, as_size(new_size)); +} + +  cv::Rect2f unnormalize(const cv::Rect2f &r, int h, int w)  {      auto unnorm = [](float x) -> float { return 0.5*(x+1); }; @@ -105,10 +183,9 @@ mat33 rotation_from_two_vectors(const vec3 &a, const vec3 &b)  } -/* Computes correction due to head being off screen center. -    x, y: In screen space, i.e. in [-1,1] -    focal_length_x: In screen space -*/ +// Computes correction due to head being off screen center. +// x, y: In screen space, i.e. in [-1,1] +// focal_length_x: In screen space  mat33 compute_rotation_correction(const cv::Point2f &p, float focal_length_x)  {      return rotation_from_two_vectors( @@ -137,6 +214,66 @@ mat33 quaternion_to_mat33(const std::array<float,4> quat)  } +vec3 rotate_vec(const quat& q, const vec3& p) +{ +    const float qw = q[0]; +    const float qi = q[1]; +    const float qj = q[2]; +    const float qk = q[3]; +    const float pi = p[0]; +    const float pj = p[1]; +    const float pk = p[2]; +    const quat tmp{ +                        - qi*pi - qj*pj - qk*pk, +        qw*pi                   + qj*pk - qk*pj, +        qw*pj - qi*pk                   + qk*pi, +        qw*pk + qi*pj - qj*pi                   +    }; +    const vec3 out { +        -tmp[0]*qi + tmp[1]*qw - tmp[2]*qk + tmp[3]*qj, +        -tmp[0]*qj + tmp[1]*qk + tmp[2]*qw - tmp[3]*qi, +        -tmp[0]*qk - tmp[1]*qj + tmp[2]*qi + tmp[3]*qw +    }; +    return out; +} + + +vec3 image_to_world(float x, float y, float size, float reference_size_in_mm, const cv::Size2i& image_size, const CamIntrinsics& intrinsics) +{ +    // Compute the location the network outputs in 3d space. +    const float xpos = -(intrinsics.focal_length_w * image_size.width * 0.5f) / size * reference_size_in_mm; +    const float zpos = (x / image_size.width * 2.f - 1.f) * xpos / intrinsics.focal_length_w; +    const float ypos = (y / image_size.height * 2.f - 1.f) * xpos / intrinsics.focal_length_h; +    return {xpos, ypos, zpos}; +} + + +vec2 world_to_image(const vec3& pos, const cv::Size2i& image_size, const CamIntrinsics& intrinsics) +{ +    const float xscr = pos[2] / pos[0] * intrinsics.focal_length_w; +    const float yscr = pos[1] / pos[0] * intrinsics.focal_length_h; +    const float x = (xscr+1.)*0.5f*image_size.width; +    const float y = (yscr+1.)*0.5f*image_size.height; +    return {x, y}; +} + + +quat image_to_world(quat q) +{ +    std::swap(q[1], q[3]); +    q[1] = -q[1]; +    q[2] = -q[2]; +    q[3] = -q[3]; +    return q; +} + +quat world_to_image(quat q) +{ +    // It's its own inverse. +    return image_to_world(q); +} + +// Intersection over union. A value between 0 and 1 which measures the match between the bounding boxes.  template<class T>  T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b)  { @@ -144,12 +281,30 @@ T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b)      return double{i.area()} / (a.area()+b.area()-i.area());  } - -} // namespace +// Returns width and height of the input tensor, or throws. +// Expects the model to take one tensor as input that must +// have the shape B x C x H x W, where B=C=1. +cv::Size get_input_image_shape(const Ort::Session &session) +{ +    if (session.GetInputCount() < 1) +        throw std::invalid_argument("Model must take at least one input tensor"); +    const std::vector<std::int64_t> shape =  +        session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); +    if (shape.size() != 4) +        throw std::invalid_argument("Model takes the input tensor in the wrong shape"); +    return { static_cast<int>(shape[3]), static_cast<int>(shape[2]) }; +} -namespace neuralnet_tracker_ns +Ort::Value create_tensor(const Ort::TypeInfo& info, Ort::Allocator& alloc)  { +    const auto shape = info.GetTensorTypeAndShapeInfo().GetShape(); +    auto t = Ort::Value::CreateTensor<float>( +        alloc, shape.data(), shape.size()); +    memset(t.GetTensorMutableData<float>(), 0, sizeof(float)*info.GetTensorTypeAndShapeInfo().GetElementCount()); +    return t; +} +  int enum_to_fps(int value) @@ -165,52 +320,49 @@ int enum_to_fps(int value)  Localizer::Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session) : -    session{std::move(session)}, -    scaled_frame(input_img_height, input_img_width, CV_8U), -    input_mat(input_img_height, input_img_width, CV_32F) +    session_{std::move(session)}, +    scaled_frame_(INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH, CV_8U), +    input_mat_(INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH, CV_32F)  {      // Only works when input_mat does not reallocated memory ...which it should not.      // Non-owning memory reference to input_mat?      // Note: shape = (bach x channels x h x w) -    const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width }; -    input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4); +    const std::int64_t input_shape[4] = { 1, 1, INPUT_IMG_HEIGHT, INPUT_IMG_WIDTH }; +    input_val_ = Ort::Value::CreateTensor<float>(allocator_info, input_mat_.ptr<float>(0), input_mat_.total(), input_shape, 4);      const std::int64_t output_shape[2] = { 1, 5 }; -    output_val = Ort::Value::CreateTensor<float>(allocator_info, results.data(), results.size(), output_shape, 2); +    output_val_ = Ort::Value::CreateTensor<float>(allocator_info, results_.data(), results_.size(), output_shape, 2);  }  std::pair<float, cv::Rect2f> Localizer::run(      const cv::Mat &frame)  { -    auto p = input_mat.ptr(0); +    auto p = input_mat_.ptr(0); -    cv::resize(frame, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA); -    scaled_frame.convertTo(input_mat, CV_32F, 1./255., -0.5); +    cv::resize(frame, scaled_frame_, { INPUT_IMG_WIDTH, INPUT_IMG_HEIGHT }, 0, 0, cv::INTER_AREA); +    scaled_frame_.convertTo(input_mat_, CV_32F, 1./255., -0.5); -    assert (input_mat.ptr(0) == p); -    assert (!input_mat.empty() && input_mat.isContinuous()); -    assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height); +    assert (input_mat_.ptr(0) == p); +    assert (!input_mat_.empty() && input_mat_.isContinuous()); +    assert (input_mat_.cols == INPUT_IMG_WIDTH && input_mat_.rows == INPUT_IMG_HEIGHT);      const char* input_names[] = {"x"};      const char* output_names[] = {"logit_box"};      Timer t; t.start(); -    const auto nt = omp_get_num_threads(); -    omp_set_num_threads(num_threads); -    session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1); -    omp_set_num_threads(nt); +    session_.Run(Ort::RunOptions{nullptr}, input_names, &input_val_, 1, output_names, &output_val_, 1); -    last_inference_time = t.elapsed_ms(); +    last_inference_time_ = t.elapsed_ms();      const cv::Rect2f roi = unnormalize(cv::Rect2f{ -        results[1], -        results[2], -        results[3]-results[1], // Width -        results[4]-results[2] // Height +        results_[1], +        results_[2], +        results_[3]-results_[1], // Width +        results_[4]-results_[2] // Height      }, frame.rows, frame.cols); -    const float score = sigmoid(results[0]); +    const float score = sigmoid(results_[0]);      return { score, roi };  } @@ -218,37 +370,91 @@ std::pair<float, cv::Rect2f> Localizer::run(  double Localizer::last_inference_time_millis() const  { -    return last_inference_time; +    return last_inference_time_;  } -PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session) : -    session{std::move(session)}, -    scaled_frame(input_img_height, input_img_width, CV_8U), -    input_mat(input_img_height, input_img_width, CV_32F) +PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&_session)  +    : model_version_{_session.GetModelMetadata().GetVersion()} +    , session_{std::move(_session)} +    , allocator_{session_, allocator_info}  { +    using namespace std::literals::string_literals; + +    if (session_.GetOutputCount() < 2) +        throw std::runtime_error("Invalid Model: must have at least two outputs"); + +    // WARNING UB .. but still ... +    // If the model was saved without meta data, it seems the version field is uninitialized. +    // In that case reading from it is UB. However, we will just get same arbitrary number +    // which is hopefully different from the numbers used by models where the version is set. +    // I.e., this is what happended in practice so far. +    if (model_version_ != 2) +        model_version_ = 1; + +    const cv::Size input_image_shape = get_input_image_shape(session_); + +    scaled_frame_ = cv::Mat(input_image_shape, CV_8U, cv::Scalar(0)); +    input_mat_ = cv::Mat(input_image_shape, CV_32F, cv::Scalar(0.f)); +      { -        const std::int64_t input_shape[4] = { 1, 1, input_img_height, input_img_width }; -        input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4); +        const std::int64_t input_shape[4] = { 1, 1, input_image_shape.height, input_image_shape.width }; +        input_val_.push_back( +            Ort::Value::CreateTensor<float>(allocator_info, input_mat_.ptr<float>(0), input_mat_.total(), input_shape, 4));      }      {          const std::int64_t output_shape[2] = { 1, 3 }; -        output_val[0] = Ort::Value::CreateTensor<float>( -            allocator_info, &output_coord[0], output_coord.rows, output_shape, 2); +        output_val_.push_back(Ort::Value::CreateTensor<float>( +            allocator_info, &output_coord_[0], output_coord_.rows, output_shape, 2));      }      {          const std::int64_t output_shape[2] = { 1, 4 }; -        output_val[1] = Ort::Value::CreateTensor<float>( -            allocator_info, &output_quat[0], output_quat.rows, output_shape, 2); +        output_val_.push_back(Ort::Value::CreateTensor<float>( +            allocator_info, &output_quat_[0], output_quat_.rows, output_shape, 2));      } +    size_t num_regular_outputs = 2; + +    if (session_.GetOutputCount() >= 3 && "box"s == session_.GetOutputName(2, allocator_))      {          const std::int64_t output_shape[2] = { 1, 4 }; -        output_val[2] = Ort::Value::CreateTensor<float>( -            allocator_info, &output_box[0], output_box.rows, output_shape, 2); +        output_val_.push_back(Ort::Value::CreateTensor<float>( +            allocator_info, &output_box_[0], output_box_.rows, output_shape, 2)); +        ++num_regular_outputs; +        qDebug() << "Note: Legacy model output for face ROI is currently ignored"; +    } + +    num_recurrent_states_ = session_.GetInputCount()-1; +    if (session_.GetOutputCount()-num_regular_outputs != num_recurrent_states_) +        throw std::runtime_error("Invalid Model: After regular inputs and outputs the model must have equal number of inputs and outputs for tensors holding hidden states of recurrent layers."); + +    // Create tensors for recurrent state +    for (size_t i = 0; i < num_recurrent_states_; ++i) +    { +        const auto& input_info = session_.GetInputTypeInfo(1+i); +        const auto& output_info = session_.GetOutputTypeInfo(num_regular_outputs+i); +        if (input_info.GetTensorTypeAndShapeInfo().GetShape() !=  +            output_info.GetTensorTypeAndShapeInfo().GetShape()) +            throw std::runtime_error("Invalid Model: Tensors for recurrent hidden states should have same shape on intput and output"); +        input_val_.push_back(create_tensor(input_info, allocator_)); +        output_val_.push_back(create_tensor(output_info, allocator_)); +    } + +    for (size_t i = 0; i < session_.GetInputCount(); ++i) +    { +        input_names_.push_back(session_.GetInputName(i, allocator_)); +    } +    for (size_t i = 0; i < session_.GetOutputCount(); ++i) +    { +        output_names_.push_back(session_.GetOutputName(i, allocator_));      } + +    qDebug() << "Model inputs: " << session_.GetInputCount() << ", outputs: " << session_.GetOutputCount() << ", recurrent states: " << num_recurrent_states_; + +    assert (input_names_.size() == input_val_.size()); +    assert (output_names_.size() == output_val_.size());  } @@ -259,9 +465,9 @@ int PoseEstimator::find_input_intensity_90_pct_quantile() const      float range[] = { 0, 256 };      const float* ranges[] = { range };      cv::Mat hist; -    cv::calcHist(&scaled_frame, 1,  channels, cv::Mat(), hist, 1, hist_size, ranges, true, false); +    cv::calcHist(&scaled_frame_, 1,  channels, cv::Mat(), hist, 1, hist_size, ranges, true, false);      int gray_level = 0; -    const int num_pixels_quantile = scaled_frame.total()*0.9f; +    const int num_pixels_quantile = scaled_frame_.total()*0.9f;      int num_pixels_accum = 0;      for (int i=0; i<hist_size[0]; ++i)      { @@ -280,7 +486,7 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(      const cv::Mat &frame, const cv::Rect &box)  {      cv::Mat cropped; -     +      const int patch_size = std::max(box.width, box.height)*1.05;      const cv::Point2f patch_center = {          std::clamp<float>(box.x + 0.5f*box.width, 0.f, frame.cols), @@ -288,64 +494,89 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(      };      cv::getRectSubPix(frame, {patch_size, patch_size}, patch_center, cropped); -    // Will get failure if patch_center is outside image boundaries. +    // Will get failure if patch_center is outside image boundariesettings.      // Have to catch this case.      if (cropped.rows != patch_size || cropped.cols != patch_size)          return {}; -     -    auto p = input_mat.ptr(0); -    cv::resize(cropped, scaled_frame, { input_img_width, input_img_height }, 0, 0, cv::INTER_AREA); +    auto p = input_mat_.ptr(0); + +    cv::resize(cropped, scaled_frame_, scaled_frame_.size(), 0, 0, cv::INTER_AREA);      // Automatic brightness amplification.      const int brightness = find_input_intensity_90_pct_quantile();      const double alpha = brightness<127 ? 0.5/std::max(5,brightness) : 1./255;      const double beta = -0.5; -    scaled_frame.convertTo(input_mat, CV_32F, alpha, beta); +    scaled_frame_.convertTo(input_mat_, CV_32F, alpha, beta); -    assert (input_mat.ptr(0) == p); -    assert (!input_mat.empty() && input_mat.isContinuous()); -    assert (input_mat.cols == input_img_width && input_mat.rows == input_img_height); +    assert (input_mat_.ptr(0) == p); +    assert (!input_mat_.empty() && input_mat_.isContinuous()); -    const char* input_names[] = {"x"}; -    const char* output_names[] = {"pos_size", "quat", "box"};      Timer t; t.start(); -    const auto nt = omp_get_num_threads(); -    omp_set_num_threads(num_threads); -    session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, output_val, 3); -    omp_set_num_threads(nt); +    try +    { +        session_.Run( +            Ort::RunOptions{ nullptr },  +            input_names_.data(),  +            input_val_.data(),  +            input_val_.size(),  +            output_names_.data(),  +            output_val_.data(),  +            output_val_.size()); +    } +    catch (const Ort::Exception &e) +    { +        qDebug() << "Failed to run the model: " << e.what(); +        return {}; +    } + +    for (size_t i = 0; i<num_recurrent_states_; ++i) +    { +        // Next step, the current output becomes the input. +        // Thus we realize the recurrent connection. +        // Only swaps the internal pointers. There is no copy of data. +        std::swap( +            output_val_[output_val_.size()-num_recurrent_states_+i], +            input_val_[input_val_.size()-num_recurrent_states_+i]); +    } -    // FIXME: Execution time fluctuates wildly. 19 to 26 ms. Why??? +    // FIXME: Execution time fluctuates wildly. 19 to 26 msec. Why?      //        The instructions are always the same. Maybe a memory allocation      //        issue. The ONNX api suggests that tensor are allocated in an      //        arena. Does that matter? Maybe the issue is something else? -    last_inference_time = t.elapsed_ms(); +    last_inference_time_ = t.elapsed_ms();      // Perform coordinate transformation.      // From patch-local normalized in [-1,1] to -    // frame unnormalized pixel coordinates. +    // frame unnormalized pixel coordinatesettings.      const cv::Point2f center = patch_center +  -        (0.5f*patch_size)*cv::Point2f{output_coord[0], output_coord[1]}; +        (0.5f*patch_size)*cv::Point2f{output_coord_[0], output_coord_[1]}; -    const float size = patch_size*0.5f*output_coord[2]; +    const float size = patch_size*0.5f*output_coord_[2];      // Following Eigen which uses quat components in the order w, x, y, z. -    const std::array<float,4> rotation = {  -        output_quat[3],  -        output_quat[0],  -        output_quat[1],  -        output_quat[2] }; +    quat rotation = {  +        output_quat_[3],  +        output_quat_[0],  +        output_quat_[1],  +        output_quat_[2] }; + +    if (model_version_ < 2) +    { +        // Due to a change in coordinate conventions +        rotation = world_to_image(rotation); +    }      const cv::Rect2f outbox = { -        patch_center.x + (0.5f*patch_size)*output_box[0], -        patch_center.y + (0.5f*patch_size)*output_box[1], -        0.5f*patch_size*(output_box[2]-output_box[0]), -        0.5f*patch_size*(output_box[3]-output_box[1]) +        patch_center.x + (0.5f*patch_size)*output_box_[0], +        patch_center.y + (0.5f*patch_size)*output_box_[1], +        0.5f*patch_size*(output_box_[2]-output_box_[0]), +        0.5f*patch_size*(output_box_[3]-output_box_[1])      };      return std::optional<Face>({ @@ -356,66 +587,102 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(  cv::Mat PoseEstimator::last_network_input() const  { +    assert(!input_mat_.empty());      cv::Mat ret; -    if (!input_mat.empty()) -    { -        input_mat.convertTo(ret, CV_8U, 255., 127.); -        cv::cvtColor(ret, ret, cv::COLOR_GRAY2RGB); -    } +    input_mat_.convertTo(ret, CV_8U, 255., 127.); +    cv::cvtColor(ret, ret, cv::COLOR_GRAY2RGB);      return ret;  }  double PoseEstimator::last_inference_time_millis() const  { -    return last_inference_time; +    return last_inference_time_;  } -bool neuralnet_tracker::detect() +bool NeuralNetTracker::detect()  { -    // Note: BGR colors! -    if (!last_localizer_roi || !last_roi || -        iou(*last_localizer_roi,*last_roi)<0.25) +    double inference_time = 0.; + +    OnScopeExit update_inference_time{ [&]() { + +            QMutexLocker lck{ &stats_mtx_ }; +            inference_time_ = inference_time; +    } }; + +    // If there is no past ROI from the localizer or if the match of its output +    // with the current ROI is too poor we have to run it again. This causes a +    // latency spike of maybe an additional 50%. But it only occurs when the user +    // moves his head far enough - or when the tracking ist lost ... +    if (!last_localizer_roi_ || !last_roi_ || +        iou(*last_localizer_roi_,*last_roi_)<0.25)      { -        auto [p, rect] = localizer->run(grayscale); -        last_inference_time += localizer->last_inference_time_millis(); -        if (p > 0.5 || rect.height < 5 || rect.width < 5) +        auto [p, rect] = localizer_->run(grayscale_); +        inference_time += localizer_->last_inference_time_millis(); +         +        if (last_roi_ && iou(rect,*last_roi_)>=0.25 && p > 0.5) +        { +            // The new ROI matches the result from tracking, so the user is +            // still there and to not disturb recurrent models, we only update +            // ... +            last_localizer_roi_ = rect; +        } +        else if (p > 0.5 && rect.height > 32 && rect.width > 32)          { -            last_localizer_roi = rect; -            last_roi = rect; +            // Tracking probably got lost since the ROI's don't match, but the +            // localizer still finds a face, so we use the ROI from the localizer +            last_localizer_roi_ = rect; +            last_roi_ = rect;          }          else          { -            last_roi.reset(); -            last_localizer_roi.reset(); +            // Tracking lost and no localization result. The user probably can't be seen. +            last_roi_.reset(); +            last_localizer_roi_.reset();          }      } -    if (!last_roi) +    if (!last_roi_)      { -        draw_gizmos(frame, {}, {}); +        draw_gizmos({}, {});          return false;      } -    auto face = poseestimator->run(grayscale, *last_roi); -    last_inference_time += poseestimator->last_inference_time_millis(); -     +    auto face = poseestimator_->run(grayscale_, *last_roi_); +    inference_time += poseestimator_->last_inference_time_millis(); +      if (!face)      { -        last_roi.reset(); -        draw_gizmos(frame, *face, {}); +        last_roi_.reset(); +        draw_gizmos(*face, {});          return false;      } -    last_roi = face->box; +    { +        // Here: compute ROI from head size estimate. This helps make the size prediction more +        //       invariant to mouth opening. The tracking can be lost more often at extreme +        //       rotations, depending on the implementation details. The code down here has +        //       been tweaked so that it works pretty well. +        // In old behaviour  ROI is taken from the model outputs +        const vec3 offset = rotate_vec(face->rotation, vec3{0.f, 0.1f*face->size, face->size*0.3f}); +        const float halfsize = face->size/float(settings_.roi_zoom); +        face->box = cv::Rect2f( +            face->center.x + offset[0] - halfsize, +            face->center.y + offset[1] - halfsize, +            halfsize*2.f, +            halfsize*2.f +        ); +    } + +    last_roi_ = ewa_filter(*last_roi_, face->box, float(settings_.roi_filter_alpha));      Affine pose = compute_pose(*face); -    draw_gizmos(frame, *face, pose); +    draw_gizmos(*face, pose);      { -        QMutexLocker lck(&mtx); +        QMutexLocker lck(&mtx_);          this->pose_ = pose;      } @@ -423,13 +690,35 @@ bool neuralnet_tracker::detect()  } -Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const +void NeuralNetTracker::draw_gizmos( +    const std::optional<PoseEstimator::Face> &face, +    const Affine& pose)  { +    if (!is_visible_) +        return; + +    preview_.draw_gizmos(face, pose, last_roi_, last_localizer_roi_, world_to_image(pose.t, grayscale_.size(), intrinsics_)); + +    if (settings_.show_network_input) +    { +        cv::Mat netinput = poseestimator_->last_network_input(); +        preview_.overlay_netinput(netinput); +    } + +    //preview_.draw_fps(fps, last_inference_time); +} + + +Affine NeuralNetTracker::compute_pose(const PoseEstimator::Face &face) const +{ +    // Compute the location the network outputs in 3d space. +      const mat33 rot_correction = compute_rotation_correction( -        normalize(face.center, frame.rows, frame.cols), -        intrinsics.focal_length_w); +        normalize(face.center, grayscale_.rows, grayscale_.cols), +        intrinsics_.focal_length_w); -    const mat33 m = rot_correction * quaternion_to_mat33(face.rotation); +    const mat33 m = rot_correction * quaternion_to_mat33( +        image_to_world(face.rotation));      /* @@ -444,44 +733,68 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const                 ------------------------      */ -    // Compute the location the network outputs in 3d space. -    const vec3 face_world_pos = image_to_world(face.center.x, face.center.y, face.size, head_size_mm); +    const vec3 face_world_pos = image_to_world( +        face.center.x, face.center.y, face.size, HEAD_SIZE_MM, +        grayscale_.size(), +        intrinsics_);      // But this is in general not the location of the rotation joint in the neck.      // So we need an extra offset. Which we determine by solving      // z,y,z-pos = head_joint_loc + R_face * offset -      const vec3 pos = face_world_pos          + m * vec3{ -            static_cast<float>(s.offset_fwd),  -            static_cast<float>(s.offset_up), -            static_cast<float>(s.offset_right)}; +            static_cast<float>(settings_.offset_fwd), +            static_cast<float>(settings_.offset_up), +            static_cast<float>(settings_.offset_right)};      return { m, pos };  } -void neuralnet_tracker::draw_gizmos( -    cv::Mat frame, +void Preview::init(const cv_video_widget& widget) +{ +    auto [w,h] = widget.preview_size(); +    preview_size_ = { w, h }; +} + + +void Preview::copy_video_frame(const cv::Mat& frame) +{ +    cv::Rect roi = make_crop_rect_for_aspect(frame.size(), preview_size_.width, preview_size_.height); +     +    cv::resize(frame(roi), preview_image_, preview_size_, 0, 0, cv::INTER_NEAREST); + +    offset_ = { (float)-roi.x, (float)-roi.y }; +    scale_ = float(preview_image_.cols) / float(roi.width); +} + + +void Preview::draw_gizmos(      const std::optional<PoseEstimator::Face> &face, -    const Affine& pose) const +    const Affine& pose, +    const std::optional<cv::Rect2f>& last_roi, +    const std::optional<cv::Rect2f>& last_localizer_roi, +    const cv::Point2f& neckjoint_position)  { +    if (preview_image_.empty()) +        return; +      if (last_roi)       {          const int col = 255; -        cv::rectangle(frame, *last_roi, cv::Scalar(0, col, 0), /*thickness=*/1); +        cv::rectangle(preview_image_, transform(*last_roi), cv::Scalar(0, col, 0), /*thickness=*/1);      }      if (last_localizer_roi)      {          const int col = 255; -        cv::rectangle(frame, *last_localizer_roi, cv::Scalar(col, 0, 255-col), /*thickness=*/1); +        cv::rectangle(preview_image_, transform(*last_localizer_roi), cv::Scalar(col, 0, 255-col), /*thickness=*/1);      }      if (face)      {          if (face->size>=1.f) -            cv::circle(frame, static_cast<cv::Point>(face->center), int(face->size), cv::Scalar(255,255,255), 2); -        cv::circle(frame, static_cast<cv::Point>(face->center), 3, cv::Scalar(255,255,255), -1); +            cv::circle(preview_image_, static_cast<cv::Point>(transform(face->center)), int(transform(face->size)), cv::Scalar(255,255,255), 2); +        cv::circle(preview_image_, static_cast<cv::Point>(transform(face->center)), 3, cv::Scalar(255,255,255), -1);          auto draw_coord_line = [&](int i, const cv::Scalar& color)          { @@ -489,43 +802,67 @@ void neuralnet_tracker::draw_gizmos(              const float vy = -pose.R(1,i);              static constexpr float len = 100.f;              cv::Point q = face->center + len*cv::Point2f{vx, vy}; -            cv::line(frame, static_cast<cv::Point>(face->center), static_cast<cv::Point>(q), color, 2); +            cv::line(preview_image_, static_cast<cv::Point>(transform(face->center)), static_cast<cv::Point>(transform(q)), color, 2);          };          draw_coord_line(0, {0, 0, 255});          draw_coord_line(1, {0, 255, 0});          draw_coord_line(2, {255, 0, 0});          // Draw the computed joint position -        auto xy = world_to_image(pose.t); -        cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1); +        auto xy = transform(neckjoint_position); +        cv::circle(preview_image_, cv::Point(xy.x,xy.y), 5, cv::Scalar(0,0,255), -1);      } +} -    if (s.show_network_input) -    { -        cv::Mat netinput = poseestimator->last_network_input(); -        if (!netinput.empty()) -        { -            const int w = std::min(netinput.cols, frame.cols); -            const int h = std::min(netinput.rows, frame.rows); -            cv::Rect roi(0, 0, w, h); -            netinput(roi).copyTo(frame(roi)); -        } -    } +void Preview::overlay_netinput(const cv::Mat& netinput) +{ +    if (netinput.empty()) +        return; + +    const int w = std::min(netinput.cols, preview_image_.cols); +    const int h = std::min(netinput.rows, preview_image_.rows); +    cv::Rect roi(0, 0, w, h); +    netinput(roi).copyTo(preview_image_(roi)); +} +void Preview::draw_fps(double fps, double last_inference_time) +{      char buf[128];      ::snprintf(buf, sizeof(buf), "%d Hz, pose inference: %d ms", std::clamp(int(fps), 0, 9999), int(last_inference_time)); -    cv::putText(frame, buf, cv::Point(10, frame.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1); +    cv::putText(preview_image_, buf, cv::Point(10, preview_image_.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1);  } -neuralnet_tracker::neuralnet_tracker() +void Preview::copy_to_widget(cv_video_widget& widget) +{ +    if (preview_image_.rows > 0) +        widget.update_image(preview_image_); +} + + +cv::Rect2f Preview::transform(const cv::Rect2f& r) const +{ +    return { (r.x - offset_.x)*scale_, (r.y - offset_.y)*scale_, r.width*scale_, r.height*scale_ }; +} + +cv::Point2f Preview::transform(const cv::Point2f& p) const +{ +    return { (p.x - offset_.x)*scale_ , (p.y - offset_.y)*scale_ }; +} + +float Preview::transform(float s) const +{ +    return s * scale_; +} + + +NeuralNetTracker::NeuralNetTracker()  {      opencv_init(); -    cv::setNumThreads(num_threads);  } -neuralnet_tracker::~neuralnet_tracker() +NeuralNetTracker::~NeuralNetTracker()  {      requestInterruption();      wait(); @@ -534,21 +871,22 @@ neuralnet_tracker::~neuralnet_tracker()  } -module_status neuralnet_tracker::start_tracker(QFrame* videoframe) +module_status NeuralNetTracker::start_tracker(QFrame* videoframe)  {      videoframe->show(); -    videoWidget = std::make_unique<cv_video_widget>(videoframe); -    layout = std::make_unique<QHBoxLayout>(); -    layout->setContentsMargins(0, 0, 0, 0); -    layout->addWidget(&*videoWidget); -    videoframe->setLayout(&*layout); -    videoWidget->show(); +    video_widget_ = std::make_unique<cv_video_widget>(videoframe); +    layout_ = std::make_unique<QHBoxLayout>(); +    layout_->setContentsMargins(0, 0, 0, 0); +    layout_->addWidget(&*video_widget_); +    videoframe->setLayout(&*layout_); +    video_widget_->show(); +    num_threads_ = settings_.num_threads;      start();      return status_ok();  } -bool neuralnet_tracker::load_and_initialize_model() +bool NeuralNetTracker::load_and_initialize_model()  {      const QString localizer_model_path_enc =          OPENTRACK_BASE_PATH+"/" OPENTRACK_LIBRARY_PATH "/models/head-localizer.onnx"; @@ -557,26 +895,25 @@ bool neuralnet_tracker::load_and_initialize_model()      try      { -        env = Ort::Env{ +        env_ = Ort::Env{              OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR,              "tracker-neuralnet"          };          auto opts = Ort::SessionOptions{};          // Do thread settings here do anything?          // There is a warning which says to control number of threads via -        // openmp settings. Which is what we do. omp_set_num_threads directly -        // before running the inference pass. -        opts.SetIntraOpNumThreads(num_threads); -        opts.SetInterOpNumThreads(num_threads); -        allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - -        localizer.emplace( -            allocator_info,  -            Ort::Session{env, convert(localizer_model_path_enc).c_str(), opts}); - -        poseestimator.emplace( -            allocator_info, -            Ort::Session{env, convert(poseestimator_model_path_enc).c_str(), opts}); +        // openmp settings. Which is what we do. +        opts.SetIntraOpNumThreads(num_threads_); +        opts.SetInterOpNumThreads(1); +        allocator_info_ = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + +        localizer_.emplace( +            allocator_info_,  +            Ort::Session{env_, convert(localizer_model_path_enc).c_str(), opts}); + +        poseestimator_.emplace( +            allocator_info_, +            Ort::Session{env_, convert(poseestimator_model_path_enc).c_str(), opts});      }      catch (const Ort::Exception &e)      { @@ -588,74 +925,87 @@ bool neuralnet_tracker::load_and_initialize_model()  } -bool neuralnet_tracker::open_camera() +bool NeuralNetTracker::open_camera()  { -    int fps = enum_to_fps(s.force_fps); +    int rint = std::clamp(*settings_.resolution, 0, (int)std::size(resolution_choices)-1); +    resolution_tuple res = resolution_choices[rint]; +    int fps = enum_to_fps(settings_.force_fps); -    QMutexLocker l(&camera_mtx); +    QMutexLocker l(&camera_mtx_); -    camera = video::make_camera(s.camera_name); +    camera_ = video::make_camera(settings_.camera_name); -    if (!camera) +    if (!camera_)          return false;      video::impl::camera::info args {}; -    args.width = 320; -    args.height = 240; - +    if (res.width) +    { +        args.width = res.width; +        args.height = res.height; +    }      if (fps)          args.fps = fps; -    args.use_mjpeg = s.use_mjpeg; +    args.use_mjpeg = settings_.use_mjpeg; -    if (!camera->start(args)) +    if (!camera_->start(args))      {          qDebug() << "neuralnet tracker: can't open camera";          return false;      } +      return true;  } -void neuralnet_tracker::set_intrinsics() +void NeuralNetTracker::set_intrinsics()  { -    const int w = grayscale.cols, h = grayscale.rows; -    const double diag_fov = s.fov * M_PI / 180.; +    const int w = grayscale_.cols, h = grayscale_.rows; +    const double diag_fov = settings_.fov * M_PI / 180.;      const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));      const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));      const double focal_length_w = 1. / tan(.5 * fov_w);      const double focal_length_h = 1. / tan(.5 * fov_h); -    intrinsics.fov_h = fov_h; -    intrinsics.fov_w = fov_w; -    intrinsics.focal_length_w = focal_length_w; -    intrinsics.focal_length_h = focal_length_h; +    intrinsics_.fov_h = fov_h; +    intrinsics_.fov_w = fov_w; +    intrinsics_.focal_length_w = focal_length_w; +    intrinsics_.focal_length_h = focal_length_h;  } -vec3 neuralnet_tracker::image_to_world(float x, float y, float size, float real_size) const +class GuardedThreadCountSwitch  { -    // Compute the location the network outputs in 3d space. -    const float xpos = -(intrinsics.focal_length_w * frame.cols * 0.5f) / size * real_size; -    const float zpos = (x / frame.cols * 2.f - 1.f) * xpos / intrinsics.focal_length_w; -    const float ypos = (y / frame.rows * 2.f - 1.f) * xpos / intrinsics.focal_length_h; -    return {xpos, ypos, zpos}; -} +    int old_num_threads_cv_ = 1; +    int old_num_threads_omp_ = 1; +    public: +        GuardedThreadCountSwitch(int num_threads) +        { +            old_num_threads_cv_ = cv::getNumThreads(); +            old_num_threads_omp_ = omp_get_num_threads(); +            omp_set_num_threads(num_threads); +            cv::setNumThreads(num_threads); +        } +             +        ~GuardedThreadCountSwitch() +        { +            omp_set_num_threads(old_num_threads_omp_); +            cv::setNumThreads(old_num_threads_cv_); +        } + +        GuardedThreadCountSwitch(const GuardedThreadCountSwitch&) = delete; +        GuardedThreadCountSwitch& operator=(const GuardedThreadCountSwitch&) = delete; +}; -vec2 neuralnet_tracker::world_to_image(const vec3& pos) const +void NeuralNetTracker::run()  { -    const float xscr = pos[2] / pos[0] * intrinsics.focal_length_w; -    const float yscr = pos[1] / pos[0] * intrinsics.focal_length_h; -    const float x = (xscr+1.)*0.5f*frame.cols; -    const float y = (yscr+1.)*0.5f*frame.rows; -    return {x, y}; -} +    preview_.init(*video_widget_); +    GuardedThreadCountSwitch switch_num_threads_to(num_threads_); -void neuralnet_tracker::run() -{      if (!open_camera())          return; @@ -666,12 +1016,12 @@ void neuralnet_tracker::run()      while (!isInterruptionRequested())      { -        last_inference_time = 0; +        is_visible_ = check_is_visible();          auto t = clk.now();          { -            QMutexLocker l(&camera_mtx); +            QMutexLocker l(&camera_mtx_); -            auto [ img, res ] = camera->get_frame(); +            auto [ img, res ] = camera_->get_frame();              if (!res)              { @@ -680,17 +1030,24 @@ void neuralnet_tracker::run()                  continue;              } -            auto color = cv::Mat(img.height, img.width, CV_8UC(img.channels), (void*)img.data, img.stride); -            color.copyTo(frame); +            { +                QMutexLocker lck{&stats_mtx_}; +                resolution_ = { img.width, img.height }; +            } + +            auto color = prepare_input_image(img); + +            if (is_visible_) +                preview_.copy_video_frame(color);              switch (img.channels)              {              case 1: -                grayscale.create(img.height, img.width, CV_8UC1); -                color.copyTo(grayscale); +                grayscale_.create(img.height, img.width, CV_8UC1); +                color.copyTo(grayscale_);                  break;              case 3: -                cv::cvtColor(color, grayscale, cv::COLOR_BGR2GRAY); +                cv::cvtColor(color, grayscale_, cv::COLOR_BGR2GRAY);                  break;              default:                  qDebug() << "Can't handle" << img.channels << "color channels"; @@ -702,8 +1059,8 @@ void neuralnet_tracker::run()          detect(); -        if (frame.rows > 0) -            videoWidget->update_image(frame); +        if (is_visible_) +            preview_.copy_to_widget(*video_widget_);          update_fps(              std::chrono::duration_cast<std::chrono::milliseconds>( @@ -712,23 +1069,50 @@ void neuralnet_tracker::run()  } -void neuralnet_tracker::update_fps(double dt) +cv::Mat NeuralNetTracker::prepare_input_image(const video::frame& frame)  { -    const double alpha = dt/(dt + RC); +    auto img = cv::Mat(frame.height, frame.width, CV_8UC(frame.channels), (void*)frame.data, frame.stride); +    // Crop if aspect ratio is not 4:3 +    if (img.rows*4 != img.cols*3) +    { +        img = img(make_crop_rect_for_aspect(img.size(), 4, 3)); +    } + +    img = img(make_crop_rect_multiple_of(img.size(), 4)); + +    if (img.cols > 640) +    { +        cv::pyrDown(img, downsized_original_images_[0]); +        img = downsized_original_images_[0]; +    } +    if (img.cols > 640) +    { +        cv::pyrDown(img, downsized_original_images_[1]); +        img = downsized_original_images_[1]; +    } + +    return img; +} + + +void NeuralNetTracker::update_fps(double dt) +{ +    const double alpha = dt/(dt + RC);      if (dt > 1e-6)      { -        fps *= 1 - alpha; -        fps += alpha * 1./dt; +        QMutexLocker lck{&stats_mtx_}; +        fps_ *= 1 - alpha; +        fps_ += alpha * 1./dt;      }  } -void neuralnet_tracker::data(double *data) +void NeuralNetTracker::data(double *data)  {      Affine tmp = [&]()      { -        QMutexLocker lck(&mtx); +        QMutexLocker lck(&mtx_);          return pose_;      }(); @@ -753,113 +1137,155 @@ void neuralnet_tracker::data(double *data)  } -Affine neuralnet_tracker::pose() +Affine NeuralNetTracker::pose()  { -    QMutexLocker lck(&mtx); +    QMutexLocker lck(&mtx_);      return pose_;  } +std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const +{ +    QMutexLocker lck(&stats_mtx_); +    return { resolution_, fps_, inference_time_ }; +} -void neuralnet_dialog::make_fps_combobox() +void NeuralNetDialog::make_fps_combobox()  {      for (int k = 0; k < fps_MAX; k++)      {          const int hz = enum_to_fps(k);          const QString name = (hz == 0) ? tr("Default") : QString::number(hz); -        ui.cameraFPS->addItem(name, k); +        ui_.cameraFPS->addItem(name, k); +    } +} + +void NeuralNetDialog::make_resolution_combobox() +{ +    int k=0; +    for (const auto [w, h] : resolution_choices) +    { +        const QString s = (w == 0)  +            ? tr("Default")  +            : QString::number(w) + " x " + QString::number(h); +        ui_.resolution->addItem(s, k++);      }  } -neuralnet_dialog::neuralnet_dialog() : -    trans_calib(1, 2) +NeuralNetDialog::NeuralNetDialog() : +    trans_calib_(1, 2)  { -    ui.setupUi(this); +    ui_.setupUi(this);      make_fps_combobox(); -    tie_setting(s.force_fps, ui.cameraFPS); +    make_resolution_combobox();      for (const auto& str : video::camera_names()) -        ui.cameraName->addItem(str); - -    tie_setting(s.camera_name, ui.cameraName); -    tie_setting(s.fov, ui.cameraFOV); -    tie_setting(s.offset_fwd, ui.tx_spin); -    tie_setting(s.offset_up, ui.ty_spin); -    tie_setting(s.offset_right, ui.tz_spin); -    tie_setting(s.show_network_input, ui.showNetworkInput); -    tie_setting(s.use_mjpeg, ui.use_mjpeg); - -    connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); -    connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); -    connect(ui.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings())); - -    connect(&s.camera_name, value_::value_changed<QString>(), this, &neuralnet_dialog::update_camera_settings_state); - -    update_camera_settings_state(s.camera_name); - -    connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step); -    calib_timer.setInterval(35); -    connect(ui.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool))); +        ui_.cameraName->addItem(str); + +    tie_setting(settings_.camera_name, ui_.cameraName); +    tie_setting(settings_.fov, ui_.cameraFOV); +    tie_setting(settings_.offset_fwd, ui_.tx_spin); +    tie_setting(settings_.offset_up, ui_.ty_spin); +    tie_setting(settings_.offset_right, ui_.tz_spin); +    tie_setting(settings_.show_network_input, ui_.showNetworkInput); +    tie_setting(settings_.roi_filter_alpha, ui_.roiFilterAlpha); +    tie_setting(settings_.use_mjpeg, ui_.use_mjpeg); +	tie_setting(settings_.roi_zoom, ui_.roiZoom); +    tie_setting(settings_.num_threads, ui_.threadCount); +    tie_setting(settings_.resolution, ui_.resolution); +    tie_setting(settings_.force_fps, ui_.cameraFPS); + +    connect(ui_.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); +    connect(ui_.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); +    connect(ui_.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings())); + +    connect(&settings_.camera_name, value_::value_changed<QString>(), this, &NeuralNetDialog::update_camera_settings_state); + +    update_camera_settings_state(settings_.camera_name); + +    connect(&calib_timer_, &QTimer::timeout, this, &NeuralNetDialog::trans_calib_step); +    calib_timer_.setInterval(35); +    connect(ui_.tcalib_button,SIGNAL(toggled(bool)), this, SLOT(startstop_trans_calib(bool))); + +    connect(&tracker_status_poll_timer_, &QTimer::timeout, this, &NeuralNetDialog::status_poll); +    tracker_status_poll_timer_.setInterval(250); +    tracker_status_poll_timer_.start();  } -void neuralnet_dialog::doOK() +void NeuralNetDialog::doOK()  { -    s.b->save(); +    settings_.b->save();      close();  } -void neuralnet_dialog::doCancel() +void NeuralNetDialog::doCancel()  {      close();  } -void neuralnet_dialog::camera_settings() +void NeuralNetDialog::camera_settings()  { -    if (tracker) +    if (tracker_)      { -        QMutexLocker l(&tracker->camera_mtx); -        (void)tracker->camera->show_dialog(); +        QMutexLocker l(&tracker_->camera_mtx_); +        (void)tracker_->camera_->show_dialog();      }      else -        (void)video::show_dialog(s.camera_name); +        (void)video::show_dialog(settings_.camera_name);  } -void neuralnet_dialog::update_camera_settings_state(const QString& name) +void NeuralNetDialog::update_camera_settings_state(const QString& name)  {      (void)name; -    ui.camera_settings->setEnabled(true); +    ui_.camera_settings->setEnabled(true); +} + + +void NeuralNetDialog::register_tracker(ITracker * x) +{ +    tracker_ = static_cast<NeuralNetTracker*>(x); +    ui_.tcalib_button->setEnabled(true);  } -void neuralnet_dialog::register_tracker(ITracker * x) +void NeuralNetDialog::unregister_tracker()  { -    tracker = static_cast<neuralnet_tracker*>(x); -    ui.tcalib_button->setEnabled(true); +    tracker_ = nullptr; +    ui_.tcalib_button->setEnabled(false);  } -void neuralnet_dialog::unregister_tracker() +void NeuralNetDialog::status_poll()  { -    tracker = nullptr; -    ui.tcalib_button->setEnabled(false); +    QString status; +    if (!tracker_) +    { +        status = tr("Tracker Offline"); +    } +    else +    { +        auto [ res, fps, inference_time ] = tracker_->stats(); +        status = tr("%1x%2 @ %3 FPS / Inference: %4 ms").arg(res.width).arg(res.height).arg(int(fps)).arg(int(inference_time)); +    } +    ui_.resolution_display->setText(status);  } -void neuralnet_dialog::trans_calib_step() +void NeuralNetDialog::trans_calib_step()  { -    if (tracker) +    if (tracker_)      {          const Affine X_CM = [&]() {  -            QMutexLocker l(&calibrator_mutex); -            return tracker->pose(); +            QMutexLocker l(&calibrator_mutex_); +            return tracker_->pose();          }(); -        trans_calib.update(X_CM.R, X_CM.t); -        auto [_, nsamples] = trans_calib.get_estimate(); +        trans_calib_.update(X_CM.R, X_CM.t); +        auto [_, nsamples] = trans_calib_.get_estimate();          constexpr int min_yaw_samples = 15;          constexpr int min_pitch_samples = 12; @@ -878,52 +1304,52 @@ void neuralnet_dialog::trans_calib_step()              const int nsamples_total = nsamples[0] + nsamples[1];              sample_feedback = tr("%1 samples. Over %2, good!").arg(nsamples_total).arg(min_samples);          } -        ui.sample_count_display->setText(sample_feedback); +        ui_.sample_count_display->setText(sample_feedback);      }      else          startstop_trans_calib(false);  } -void neuralnet_dialog::startstop_trans_calib(bool start) +void NeuralNetDialog::startstop_trans_calib(bool start)  { -    QMutexLocker l(&calibrator_mutex); +    QMutexLocker l(&calibrator_mutex_);      // FIXME: does not work ...        if (start)      {          qDebug() << "pt: starting translation calibration"; -        calib_timer.start(); -        trans_calib.reset(); -        ui.sample_count_display->setText(QString()); +        calib_timer_.start(); +        trans_calib_.reset(); +        ui_.sample_count_display->setText(QString());          // Tracker must run with zero'ed offset for calibration. -        s.offset_fwd = 0; -        s.offset_up = 0; -        s.offset_right = 0; +        settings_.offset_fwd = 0; +        settings_.offset_up = 0; +        settings_.offset_right = 0;      }      else      { -        calib_timer.stop(); +        calib_timer_.stop();          qDebug() << "pt: stopping translation calibration";          { -            auto [tmp, nsamples] = trans_calib.get_estimate(); -            s.offset_fwd = int(tmp[0]); -            s.offset_up = int(tmp[1]); -            s.offset_right = int(tmp[2]); +            auto [tmp, nsamples] = trans_calib_.get_estimate(); +            settings_.offset_fwd = int(tmp[0]); +            settings_.offset_up = int(tmp[1]); +            settings_.offset_right = int(tmp[2]);          }      } -    ui.tx_spin->setEnabled(!start); -    ui.ty_spin->setEnabled(!start); -    ui.tz_spin->setEnabled(!start); +    ui_.tx_spin->setEnabled(!start); +    ui_.ty_spin->setEnabled(!start); +    ui_.tz_spin->setEnabled(!start);      if (start) -        ui.tcalib_button->setText(tr("Stop calibration")); +        ui_.tcalib_button->setText(tr("Stop calibration"));      else -        ui.tcalib_button->setText(tr("Start calibration")); +        ui_.tcalib_button->setText(tr("Start calibration"));  } -settings::settings() : opts("neuralnet-tracker") {} +Settings::Settings() : opts("neuralnet-tracker") {}  } // neuralnet_tracker_ns -OPENTRACK_DECLARE_TRACKER(neuralnet_tracker, neuralnet_dialog, neuralnet_metadata) +OPENTRACK_DECLARE_TRACKER(NeuralNetTracker, NeuralNetDialog, NeuralNetMetadata) diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index 5f9c6fbe..9b481186 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -25,6 +25,7 @@  #include <memory>  #include <cinttypes> +#include <array>  #include <onnxruntime_cxx_api.h> @@ -49,8 +50,25 @@ enum fps_choices      fps_MAX     = 3  }; +struct resolution_tuple +{ +    int width; +    int height; +}; + +static const std::array<resolution_tuple, 7> resolution_choices = +{{ +    { 320, 240 }, +    { 640, 480 }, +    { 800, 600 }, +    { 1024, 768 }, +    { 1280, 720 }, +    { 1920, 1080}, +    { 0, 0 } +}}; -struct settings : opts { + +struct Settings : opts {      value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters                 offset_up { b, "offset-up", 0 },                 offset_right { b, "offset-right", 0 }; @@ -58,8 +76,12 @@ struct settings : opts {      value<int> fov { b, "field-of-view", 56 };      value<fps_choices> force_fps { b, "force-fps", fps_default };      value<bool> show_network_input { b, "show-network-input", false }; +    value<double> roi_filter_alpha{ b, "roi-filter-alpha", 1. }; +    value<double> roi_zoom{ b, "roi-zoom", 1. };      value<bool> use_mjpeg { b, "use-mjpeg", false }; -    settings(); +    value<int> num_threads { b, "num-threads", 1 }; +    value<int> resolution { b, "force-resolution", 0 }; +    Settings();  }; @@ -85,14 +107,14 @@ class Localizer          double last_inference_time_millis() const;      private: -        inline static constexpr int input_img_width = 288; -        inline static constexpr int input_img_height = 224; -        Ort::Session session{nullptr}; +        inline static constexpr int INPUT_IMG_WIDTH = 288; +        inline static constexpr int INPUT_IMG_HEIGHT = 224; +        Ort::Session session_{nullptr};          // Inputs / outputs -        cv::Mat scaled_frame{}, input_mat{}; -        Ort::Value input_val{nullptr}, output_val{nullptr}; -        std::array<float, 5> results; -        double last_inference_time = 0; +        cv::Mat scaled_frame_{}, input_mat_{}; +        Ort::Value input_val_{nullptr}, output_val_{nullptr}; +        std::array<float, 5> results_; +        double last_inference_time_ = 0;  }; @@ -102,7 +124,6 @@ class PoseEstimator          struct Face          {              std::array<float,4> rotation; // Quaternion, (w, x, y, z) -            // The following quantities are defined wrt the image space of the input              cv::Rect2f box;              cv::Point2f center;              float size; @@ -110,7 +131,11 @@ class PoseEstimator          PoseEstimator(Ort::MemoryInfo &allocator_info,                          Ort::Session &&session); -        // Inference +        /** Inference +        * +        * Coordinates are defined wrt. the image space of the input `frame`. +        * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom) +        */          std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);          // Returns an image compatible with the 'frame' image for displaying.          cv::Mat last_network_input() const; @@ -118,96 +143,129 @@ class PoseEstimator      private:          // Operates on the private image data members          int find_input_intensity_90_pct_quantile() const; -        inline static constexpr int input_img_width = 129; -        inline static constexpr int input_img_height = 129; -        Ort::Session session{nullptr}; + +        int64_t model_version_ = 0;  // Queried meta data from the ONNX file +        Ort::Session session_{nullptr};  // ONNX's runtime context for running the model +        Ort::Allocator allocator_;   // Memory allocator for tensors          // Inputs -        cv::Mat scaled_frame{}, input_mat{}; -        Ort::Value input_val{nullptr}; +        cv::Mat scaled_frame_{}, input_mat_{};  // Input. One is the original crop, the other is rescaled (?) +        std::vector<Ort::Value> input_val_;    // Tensors to put into the model +        std::vector<const char*> input_names_; // Refers to the names in the onnx model.           // Outputs -        cv::Vec<float, 3> output_coord{}; -        cv::Vec<float, 4> output_quat{}; -        cv::Vec<float, 4> output_box{}; -        Ort::Value output_val[3] = { -            Ort::Value{nullptr},  -            Ort::Value{nullptr},  -            Ort::Value{nullptr}}; -        double last_inference_time = 0; +        cv::Vec<float, 3> output_coord_{};  // 2d Coordinate and head size output. +        cv::Vec<float, 4> output_quat_{};   //  Quaternion output +        cv::Vec<float, 4> output_box_{};    // Bounding box output +        std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in. +        std::vector<const char*> output_names_; // Refers to the names in the onnx model.  +        size_t num_recurrent_states_ = 0; +        double last_inference_time_ = 0; +}; + + +class Preview +{ +public: +    void init(const cv_video_widget& widget); +    void copy_video_frame(const cv::Mat& frame); +    void draw_gizmos( +        const std::optional<PoseEstimator::Face> &face, +        const Affine& pose, +        const std::optional<cv::Rect2f>& last_roi, +        const std::optional<cv::Rect2f>& last_localizer_roi, +        const cv::Point2f& neckjoint_position); +    void overlay_netinput(const cv::Mat& netinput); +    void draw_fps(double fps, double last_inference_time); +    void copy_to_widget(cv_video_widget& widget); +private: +    // Transform from camera frame to preview +    cv::Rect2f transform(const cv::Rect2f& r) const; +    cv::Point2f transform(const cv::Point2f& p) const; +    float transform(float s) const; + +    cv::Mat preview_image_; +    cv::Size preview_size_ = { 0, 0 }; +    float scale_ = 1.f;   +    cv::Point2f offset_ = { 0.f, 0.f};  }; -class neuralnet_tracker : protected virtual QThread, public ITracker +class NeuralNetTracker : protected virtual QThread, public ITracker  {      Q_OBJECT  public: -    neuralnet_tracker(); -    ~neuralnet_tracker() override; +    NeuralNetTracker(); +    ~NeuralNetTracker() override;      module_status start_tracker(QFrame* frame) override;      void data(double *data) override;      void run() override;      Affine pose(); +    std::tuple<cv::Size, double, double> stats() const; -    QMutex camera_mtx; -    std::unique_ptr<video::impl::camera> camera; +    QMutex camera_mtx_; +    std::unique_ptr<video::impl::camera> camera_;  private:      bool detect();      bool open_camera();      void set_intrinsics(); +    cv::Mat prepare_input_image(const video::frame& frame);      bool load_and_initialize_model();      void draw_gizmos( -        cv::Mat frame,            const std::optional<PoseEstimator::Face> &face, -        const Affine& pose) const; +        const Affine& pose);      void update_fps(double dt); -      Affine compute_pose(const PoseEstimator::Face &face) const; -    numeric_types::vec3 image_to_world(float x, float y, float size, float real_size) const; -    numeric_types::vec2 world_to_image(const numeric_types::vec3& p) const; - -    settings s; -    std::optional<Localizer> localizer; -    std::optional<PoseEstimator> poseestimator; -    Ort::Env env{nullptr}; -    Ort::MemoryInfo allocator_info{nullptr}; - -    CamIntrinsics intrinsics{}; -    cv::Mat frame, grayscale; -    std::optional<cv::Rect2f> last_localizer_roi; -    std::optional<cv::Rect2f> last_roi; -    static constexpr float head_size_mm = 200.f; - -    double fps = 0; -    double last_inference_time = 0; + +    Settings settings_; +    std::optional<Localizer> localizer_; +    std::optional<PoseEstimator> poseestimator_; +    Ort::Env env_{nullptr}; +    Ort::MemoryInfo allocator_info_{nullptr}; + +    CamIntrinsics intrinsics_{}; +    cv::Mat grayscale_; +    std::array<cv::Mat,2> downsized_original_images_ = {}; // Image pyramid +    std::optional<cv::Rect2f> last_localizer_roi_; +    std::optional<cv::Rect2f> last_roi_; +    static constexpr float HEAD_SIZE_MM = 200.f; + +    mutable QMutex stats_mtx_; +    double fps_ = 0; +    double inference_time_ = 0; +    cv::Size resolution_ = {}; +          static constexpr double RC = .25; +    int num_threads_ = 1; +    bool is_visible_ = true; -    QMutex mtx; // Protects the pose +    QMutex mtx_; // Protects the pose      Affine pose_; -    std::unique_ptr<cv_video_widget> videoWidget; -    std::unique_ptr<QHBoxLayout> layout; +    Preview preview_; +    std::unique_ptr<cv_video_widget> video_widget_; +    std::unique_ptr<QHBoxLayout> layout_;  }; -class neuralnet_dialog : public ITrackerDialog +class NeuralNetDialog : public ITrackerDialog  {      Q_OBJECT  public: -    neuralnet_dialog(); +    NeuralNetDialog();      void register_tracker(ITracker * x) override;      void unregister_tracker() override;  private:      void make_fps_combobox(); +    void make_resolution_combobox(); -    Ui::Form ui; -    settings s; -     +    Ui::Form ui_; +    Settings settings_;      // Calibration code mostly taken from point tracker -    QTimer calib_timer; -    TranslationCalibrator trans_calib; -    QMutex calibrator_mutex; - -    neuralnet_tracker* tracker = nullptr; +    QTimer calib_timer_; +    TranslationCalibrator trans_calib_; +    QMutex calibrator_mutex_; +    QTimer tracker_status_poll_timer_; +    NeuralNetTracker* tracker_ = nullptr;  private Q_SLOTS:      void doOK(); @@ -216,10 +274,11 @@ private Q_SLOTS:      void update_camera_settings_state(const QString& name);      void startstop_trans_calib(bool start);      void trans_calib_step(); +    void status_poll();  }; -class neuralnet_metadata : public Metadata +class NeuralNetMetadata : public Metadata  {      Q_OBJECT      QString name() override { return QString("neuralnet tracker"); } @@ -229,6 +288,6 @@ class neuralnet_metadata : public Metadata  } // neuralnet_tracker_ns -using neuralnet_tracker_ns::neuralnet_tracker; -using neuralnet_tracker_ns::neuralnet_dialog; -using neuralnet_tracker_ns::neuralnet_metadata; +using neuralnet_tracker_ns::NeuralNetTracker; +using neuralnet_tracker_ns::NeuralNetDialog; +using neuralnet_tracker_ns::NeuralNetMetadata; diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts index a2dcd958..dbcd3c8c 100644 --- a/tracker-neuralnet/lang/nl_NL.ts +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -64,14 +64,70 @@ Don't roll or change position.</source>          <source>MJPEG</source>          <translation type="unfinished"></translation>      </message> +    <message> +        <source>Tuning / Debug</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Smoothing Alpha</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Zoom</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Thread Count</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Resolution</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Field of view. Needed to transform the pose to world coordinates.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Show the image patch that the pose estimation model sees.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> +        <translation type="unfinished"></translation> +    </message>  </context>  <context> -    <name>neuralnet_tracker_ns::neuralnet_dialog</name> +    <name>neuralnet_tracker_ns::NeuralNetDialog</name>      <message>          <source>Default</source>          <translation type="unfinished">Standaard</translation>      </message>      <message> +        <source>Tracker Offline</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> +        <translation type="unfinished"></translation> +    </message> +    <message>          <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>          <translation type="unfinished"></translation>      </message> diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index 7e8a9c09..a80c7e3d 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -64,14 +64,70 @@ Don't roll or change position.</source>          <source>MJPEG</source>          <translation type="unfinished"></translation>      </message> +    <message> +        <source>Tuning / Debug</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Smoothing Alpha</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Zoom</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Thread Count</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Resolution</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Field of view. Needed to transform the pose to world coordinates.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Show the image patch that the pose estimation model sees.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> +        <translation type="unfinished"></translation> +    </message>  </context>  <context> -    <name>neuralnet_tracker_ns::neuralnet_dialog</name> +    <name>neuralnet_tracker_ns::NeuralNetDialog</name>      <message>          <source>Default</source>          <translation type="unfinished"></translation>      </message>      <message> +        <source>Tracker Offline</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> +        <translation type="unfinished"></translation> +    </message> +    <message>          <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>          <translation type="unfinished"></translation>      </message> diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts index 66e1695c..4cde86a9 100644 --- a/tracker-neuralnet/lang/stub.ts +++ b/tracker-neuralnet/lang/stub.ts @@ -64,14 +64,70 @@ Don't roll or change position.</source>          <source>MJPEG</source>          <translation type="unfinished"></translation>      </message> +    <message> +        <source>Tuning / Debug</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Smoothing Alpha</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Zoom</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Thread Count</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Resolution</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Field of view. Needed to transform the pose to world coordinates.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Show the image patch that the pose estimation model sees.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> +        <translation type="unfinished"></translation> +    </message>  </context>  <context> -    <name>neuralnet_tracker_ns::neuralnet_dialog</name> +    <name>neuralnet_tracker_ns::NeuralNetDialog</name>      <message>          <source>Default</source>          <translation type="unfinished"></translation>      </message>      <message> +        <source>Tracker Offline</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> +        <translation type="unfinished"></translation> +    </message> +    <message>          <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>          <translation type="unfinished"></translation>      </message> diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts index 8e9513b3..c3a91211 100644 --- a/tracker-neuralnet/lang/zh_CN.ts +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -64,14 +64,70 @@ Don't roll or change position.</source>          <source>MJPEG</source>          <translation type="unfinished"></translation>      </message> +    <message> +        <source>Tuning / Debug</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Smoothing Alpha</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Zoom</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Thread Count</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Resolution</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Field of view. Needed to transform the pose to world coordinates.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Show the image patch that the pose estimation model sees.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> +        <translation type="unfinished"></translation> +    </message>  </context>  <context> -    <name>neuralnet_tracker_ns::neuralnet_dialog</name> +    <name>neuralnet_tracker_ns::NeuralNetDialog</name>      <message>          <source>Default</source>          <translation type="unfinished"></translation>      </message>      <message> +        <source>Tracker Offline</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> +        <translation type="unfinished"></translation> +    </message> +    <message>          <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>          <translation type="unfinished"></translation>      </message> diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui index 5f72a809..750e6ef3 100644 --- a/tracker-neuralnet/neuralnet-trackercontrols.ui +++ b/tracker-neuralnet/neuralnet-trackercontrols.ui @@ -9,353 +9,602 @@     <rect>      <x>0</x>      <y>0</y> -    <width>727</width> -    <height>202</height> +    <width>671</width> +    <height>357</height>     </rect>    </property>    <property name="windowTitle">     <string>Tracker settings</string>    </property>    <layout class="QGridLayout" name="gridLayout"> -   <item row="5" column="0"> -    <widget class="QDialogButtonBox" name="buttonBox"> -     <property name="standardButtons"> -      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> +   <item row="2" column="0"> +    <widget class="QGroupBox" name="groupBox"> +     <property name="sizePolicy"> +      <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +       <horstretch>0</horstretch> +       <verstretch>0</verstretch> +      </sizepolicy>       </property> -    </widget> -   </item> -   <item row="3" column="0"> -    <widget class="QFrame" name="frame_3"> -     <property name="frameShape"> -      <enum>QFrame::StyledPanel</enum> +     <property name="autoFillBackground"> +      <bool>true</bool>       </property> -     <property name="frameShadow"> -      <enum>QFrame::Raised</enum> +     <property name="title"> +      <string>Camera Configuration</string> +     </property> +     <property name="flat"> +      <bool>false</bool> +     </property> +     <property name="checkable"> +      <bool>false</bool>       </property>       <layout class="QHBoxLayout" name="horizontalLayout">        <property name="spacing"> -       <number>0</number> -      </property> -      <property name="leftMargin"> -       <number>0</number> -      </property> -      <property name="topMargin"> -       <number>0</number> -      </property> -      <property name="rightMargin"> -       <number>0</number> +       <number>10</number>        </property>        <property name="bottomMargin"> -       <number>0</number> +       <number>8</number>        </property>        <item> -       <widget class="QGroupBox" name="groupBox"> +       <layout class="QGridLayout" name="gridLayout_3"> +        <property name="sizeConstraint"> +         <enum>QLayout::SetDefaultConstraint</enum> +        </property> +        <property name="leftMargin"> +         <number>0</number> +        </property> +        <property name="topMargin"> +         <number>0</number> +        </property> +        <property name="rightMargin"> +         <number>0</number> +        </property> +        <property name="bottomMargin"> +         <number>0</number> +        </property> +        <property name="horizontalSpacing"> +         <number>0</number> +        </property> +        <property name="verticalSpacing"> +         <number>2</number> +        </property> +        <item row="0" column="1"> +         <widget class="QComboBox" name="cameraName"> +          <property name="sizePolicy"> +           <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +            <horstretch>0</horstretch> +            <verstretch>0</verstretch> +           </sizepolicy> +          </property> +         </widget> +        </item> +        <item row="1" column="0"> +         <widget class="QLabel" name="label_9"> +          <property name="text"> +           <string>Diagonal FOV</string> +          </property> +         </widget> +        </item> +        <item row="0" column="0"> +         <widget class="QLabel" name="label_10"> +          <property name="text"> +           <string>Camera name</string> +          </property> +         </widget> +        </item> +        <item row="1" column="1"> +         <widget class="QSpinBox" name="cameraFOV"> +          <property name="sizePolicy"> +           <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +            <horstretch>0</horstretch> +            <verstretch>0</verstretch> +           </sizepolicy> +          </property> +          <property name="toolTip"> +           <string>Field of view. Needed to transform the pose to world coordinates.</string> +          </property> +          <property name="locale"> +           <locale language="English" country="UnitedStates"/> +          </property> +          <property name="minimum"> +           <number>35</number> +          </property> +          <property name="maximum"> +           <number>90</number> +          </property> +         </widget> +        </item> +        <item row="2" column="1"> +         <widget class="QComboBox" name="resolution"> +          <property name="sizePolicy"> +           <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +            <horstretch>0</horstretch> +            <verstretch>0</verstretch> +           </sizepolicy> +          </property> +          <property name="toolTip"> +           <string>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</string> +          </property> +         </widget> +        </item> +        <item row="2" column="0"> +         <widget class="QLabel" name="resolution_label"> +          <property name="text"> +           <string>Resolution</string> +          </property> +         </widget> +        </item> +       </layout> +      </item> +      <item> +       <layout class="QGridLayout" name="gridLayout_6"> +        <property name="leftMargin"> +         <number>0</number> +        </property> +        <property name="topMargin"> +         <number>0</number> +        </property> +        <property name="rightMargin"> +         <number>0</number> +        </property> +        <property name="bottomMargin"> +         <number>0</number> +        </property> +        <property name="horizontalSpacing"> +         <number>0</number> +        </property> +        <property name="verticalSpacing"> +         <number>2</number> +        </property> +        <item row="4" column="1"> +         <widget class="QComboBox" name="cameraFPS"> +          <property name="sizePolicy"> +           <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +            <horstretch>0</horstretch> +            <verstretch>0</verstretch> +           </sizepolicy> +          </property> +          <property name="toolTip"> +           <string>Requested video frame rate. Actual setting may not be supported by the camera.</string> +          </property> +         </widget> +        </item> +        <item row="4" column="0"> +         <widget class="QLabel" name="label_12"> +          <property name="text"> +           <string>Frames per second</string> +          </property> +         </widget> +        </item> +        <item row="2" column="1"> +         <widget class="QCheckBox" name="use_mjpeg"> +          <property name="sizePolicy"> +           <sizepolicy hsizetype="Minimum" vsizetype="Preferred"> +            <horstretch>0</horstretch> +            <verstretch>0</verstretch> +           </sizepolicy> +          </property> +          <property name="minimumSize"> +           <size> +            <width>0</width> +            <height>0</height> +           </size> +          </property> +          <property name="text"> +           <string/> +          </property> +         </widget> +        </item> +        <item row="2" column="0"> +         <widget class="QLabel" name="label_11"> +          <property name="text"> +           <string>MJPEG</string> +          </property> +         </widget> +        </item> +        <item row="0" column="1"> +         <widget class="QPushButton" name="camera_settings"> +          <property name="sizePolicy"> +           <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +            <horstretch>0</horstretch> +            <verstretch>0</verstretch> +           </sizepolicy> +          </property> +          <property name="text"> +           <string>Camera settings</string> +          </property> +         </widget> +        </item> +       </layout> +      </item> +     </layout> +    </widget> +   </item> +   <item row="9" column="0"> +    <widget class="QDialogButtonBox" name="buttonBox"> +     <property name="standardButtons"> +      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> +     </property> +    </widget> +   </item> +   <item row="5" column="0"> +    <widget class="QGroupBox" name="tuningOptionsBox"> +     <property name="sizePolicy"> +      <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +       <horstretch>0</horstretch> +       <verstretch>0</verstretch> +      </sizepolicy> +     </property> +     <property name="minimumSize"> +      <size> +       <width>0</width> +       <height>0</height> +      </size> +     </property> +     <property name="autoFillBackground"> +      <bool>true</bool> +     </property> +     <property name="title"> +      <string>Tuning / Debug</string> +     </property> +     <layout class="QGridLayout" name="gridLayout_2"> +      <item row="0" column="10"> +       <widget class="Line" name="line_2"> +        <property name="orientation"> +         <enum>Qt::Vertical</enum> +        </property> +       </widget> +      </item> +      <item row="0" column="1"> +       <widget class="QSpinBox" name="threadCount"> +        <property name="toolTip"> +         <string>Number of threads. Can be used to balance the CPU load between the game and the tracker.</string> +        </property> +        <property name="minimum"> +         <number>1</number> +        </property> +        <property name="maximum"> +         <number>32</number> +        </property> +       </widget> +      </item> +      <item row="0" column="4"> +       <widget class="Line" name="line"> +        <property name="orientation"> +         <enum>Qt::Vertical</enum> +        </property> +       </widget> +      </item> +      <item row="0" column="8"> +       <widget class="QLabel" name="roiFilterAlphaLabel">          <property name="sizePolicy"> -         <sizepolicy hsizetype="Fixed" vsizetype="Preferred"> +         <sizepolicy hsizetype="Minimum" vsizetype="Minimum">            <horstretch>0</horstretch>            <verstretch>0</verstretch>           </sizepolicy>          </property> -        <property name="title"> -         <string>Camera Configuration</string> +        <property name="text"> +         <string>ROI Smoothing Alpha</string>          </property> -        <layout class="QGridLayout" name="gridLayout_4"> -         <item row="3" column="1"> -          <widget class="QComboBox" name="cameraName"> -           <property name="sizePolicy"> -            <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> -             <horstretch>0</horstretch> -             <verstretch>0</verstretch> -            </sizepolicy> -           </property> -          </widget> -         </item> -         <item row="0" column="1"> -          <widget class="QSpinBox" name="cameraFOV"> -           <property name="sizePolicy"> -            <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> -             <horstretch>0</horstretch> -             <verstretch>0</verstretch> -            </sizepolicy> +       </widget> +      </item> +      <item row="0" column="11"> +       <widget class="QLabel" name="roiZoomLabel"> +        <property name="text"> +         <string>ROI Zoom</string> +        </property> +       </widget> +      </item> +      <item row="0" column="2"> +       <widget class="Line" name="line_3"> +        <property name="orientation"> +         <enum>Qt::Vertical</enum> +        </property> +       </widget> +      </item> +      <item row="0" column="3"> +       <widget class="QCheckBox" name="showNetworkInput"> +        <property name="sizePolicy"> +         <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> +          <horstretch>0</horstretch> +          <verstretch>0</verstretch> +         </sizepolicy> +        </property> +        <property name="toolTip"> +         <string>Show the image patch that the pose estimation model sees.</string> +        </property> +        <property name="text"> +         <string>Show Network Input</string> +        </property> +       </widget> +      </item> +      <item row="0" column="9"> +       <widget class="QDoubleSpinBox" name="roiFilterAlpha"> +        <property name="sizePolicy"> +         <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> +          <horstretch>0</horstretch> +          <verstretch>0</verstretch> +         </sizepolicy> +        </property> +        <property name="maximumSize"> +         <size> +          <width>150</width> +          <height>16777215</height> +         </size> +        </property> +        <property name="toolTip"> +         <string>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</string> +        </property> +        <property name="wrapping"> +         <bool>false</bool> +        </property> +        <property name="decimals"> +         <number>2</number> +        </property> +        <property name="maximum"> +         <double>1.000000000000000</double> +        </property> +        <property name="singleStep"> +         <double>0.010000000000000</double> +        </property> +        <property name="value"> +         <double>1.000000000000000</double> +        </property> +       </widget> +      </item> +      <item row="0" column="0"> +       <widget class="QLabel" name="threadCountLabel"> +        <property name="text"> +         <string>Thread Count</string> +        </property> +       </widget> +      </item> +      <item row="0" column="12"> +       <widget class="QDoubleSpinBox" name="roiZoom"> +        <property name="toolTip"> +         <string>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</string> +        </property> +        <property name="minimum"> +         <double>0.100000000000000</double> +        </property> +        <property name="maximum"> +         <double>2.000000000000000</double> +        </property> +        <property name="singleStep"> +         <double>0.010000000000000</double> +        </property> +        <property name="value"> +         <double>1.000000000000000</double> +        </property> +       </widget> +      </item> +      <item row="0" column="13"> +       <spacer name="horizontalSpacer"> +        <property name="orientation"> +         <enum>Qt::Horizontal</enum> +        </property> +        <property name="sizeHint" stdset="0"> +         <size> +          <width>40</width> +          <height>20</height> +         </size> +        </property> +       </spacer> +      </item> +     </layout> +    </widget> +   </item> +   <item row="4" column="0"> +    <widget class="QGroupBox" name="groupBox_10"> +     <property name="sizePolicy"> +      <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +       <horstretch>0</horstretch> +       <verstretch>0</verstretch> +      </sizepolicy> +     </property> +     <property name="autoFillBackground"> +      <bool>true</bool> +     </property> +     <property name="title"> +      <string>Head Center Offset</string> +     </property> +     <layout class="QGridLayout" name="gridLayout_5"> +      <item row="0" column="0"> +       <widget class="QFrame" name="frame_4"> +        <property name="sizePolicy"> +         <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +          <horstretch>0</horstretch> +          <verstretch>0</verstretch> +         </sizepolicy> +        </property> +        <property name="maximumSize"> +         <size> +          <width>16777215</width> +          <height>16777215</height> +         </size> +        </property> +        <property name="frameShape"> +         <enum>QFrame::NoFrame</enum> +        </property> +        <property name="frameShadow"> +         <enum>QFrame::Raised</enum> +        </property> +        <layout class="QGridLayout" name="gridLayout_11"> +         <property name="sizeConstraint"> +          <enum>QLayout::SetDefaultConstraint</enum> +         </property> +         <property name="verticalSpacing"> +          <number>0</number> +         </property> +         <item row="1" column="1"> +          <widget class="QSpinBox" name="ty_spin"> +           <property name="maximumSize"> +            <size> +             <width>150</width> +             <height>16777215</height> +            </size>             </property> -           <property name="locale"> -            <locale language="English" country="UnitedStates"/> +           <property name="suffix"> +            <string> mm</string>             </property>             <property name="minimum"> -            <number>35</number> +            <number>-65535</number>             </property>             <property name="maximum"> -            <number>90</number> -           </property> -          </widget> -         </item> -         <item row="1" column="0"> -          <widget class="QLabel" name="label_12"> -           <property name="text"> -            <string>Frames per second</string> +            <number>65536</number>             </property>            </widget>           </item>           <item row="2" column="0"> -          <widget class="QLabel" name="label_11"> -           <property name="text"> -            <string>MJPEG</string> +          <widget class="QLabel" name="label_66"> +           <property name="sizePolicy"> +            <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> +             <horstretch>0</horstretch> +             <verstretch>0</verstretch> +            </sizepolicy>             </property> -          </widget> -         </item> -         <item row="3" column="0"> -          <widget class="QLabel" name="label_10">             <property name="text"> -            <string>Camera name</string> +            <string>Right</string>             </property>            </widget>           </item> -         <item row="0" column="0"> -          <widget class="QLabel" name="label_9"> -           <property name="text"> -            <string>Diagonal FOV</string> +         <item row="2" column="1"> +          <widget class="QSpinBox" name="tz_spin"> +           <property name="maximumSize"> +            <size> +             <width>150</width> +             <height>16777215</height> +            </size> +           </property> +           <property name="suffix"> +            <string> mm</string> +           </property> +           <property name="minimum"> +            <number>-65535</number> +           </property> +           <property name="maximum"> +            <number>65536</number>             </property>            </widget>           </item> -         <item row="1" column="1"> -          <widget class="QComboBox" name="cameraFPS"> +         <item row="0" column="0"> +          <widget class="QLabel" name="label_61">             <property name="sizePolicy"> -            <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +            <sizepolicy hsizetype="Maximum" vsizetype="Preferred">               <horstretch>0</horstretch>               <verstretch>0</verstretch>              </sizepolicy>             </property> +           <property name="text"> +            <string>Forward</string> +           </property>            </widget>           </item> -         <item row="2" column="1"> -          <widget class="QCheckBox" name="use_mjpeg"> -           <property name="sizePolicy"> -            <sizepolicy hsizetype="Minimum" vsizetype="Maximum"> -             <horstretch>0</horstretch> -             <verstretch>0</verstretch> -            </sizepolicy> +         <item row="0" column="1"> +          <widget class="QSpinBox" name="tx_spin"> +           <property name="maximumSize"> +            <size> +             <width>150</width> +             <height>16777215</height> +            </size>             </property> -           <property name="text"> -            <string/> +           <property name="suffix"> +            <string> mm</string> +           </property> +           <property name="minimum"> +            <number>-65535</number> +           </property> +           <property name="maximum"> +            <number>65536</number>             </property>            </widget>           </item> -         <item row="4" column="1"> -          <widget class="QPushButton" name="camera_settings"> +         <item row="1" column="0"> +          <widget class="QLabel" name="label_62">             <property name="sizePolicy"> -            <sizepolicy hsizetype="Preferred" vsizetype="Maximum"> +            <sizepolicy hsizetype="Maximum" vsizetype="Preferred">               <horstretch>0</horstretch>               <verstretch>0</verstretch>              </sizepolicy>             </property>             <property name="text"> -            <string>Camera settings</string> +            <string>Up</string>             </property>            </widget>           </item>          </layout>         </widget>        </item> -      <item> -       <widget class="QGroupBox" name="groupBox_10"> +      <item row="0" column="1"> +       <widget class="QFrame" name="frame_5">          <property name="sizePolicy"> -         <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +         <sizepolicy hsizetype="Preferred" vsizetype="Expanding">            <horstretch>0</horstretch>            <verstretch>0</verstretch>           </sizepolicy>          </property> -        <property name="title"> -         <string>Head Center Offset</string> +        <property name="minimumSize"> +         <size> +          <width>260</width> +          <height>0</height> +         </size>          </property> -        <layout class="QGridLayout" name="gridLayout_5"> -         <item row="0" column="0"> -          <widget class="QFrame" name="frame_4"> -           <property name="sizePolicy"> -            <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> -             <horstretch>0</horstretch> -             <verstretch>0</verstretch> -            </sizepolicy> +        <property name="frameShape"> +         <enum>QFrame::NoFrame</enum> +        </property> +        <property name="frameShadow"> +         <enum>QFrame::Raised</enum> +        </property> +        <layout class="QVBoxLayout" name="verticalLayout_2"> +         <item> +          <widget class="QLabel" name="label_59"> +           <property name="text"> +            <string>Use only yaw and pitch while calibrating. +Don't roll or change position.</string>             </property> -           <property name="maximumSize"> -            <size> -             <width>16777215</width> -             <height>16777215</height> -            </size> +           <property name="alignment"> +            <set>Qt::AlignCenter</set>             </property> -           <property name="frameShape"> -            <enum>QFrame::NoFrame</enum> +           <property name="wordWrap"> +            <bool>true</bool>             </property> -           <property name="frameShadow"> -            <enum>QFrame::Raised</enum> +           <property name="openExternalLinks"> +            <bool>false</bool>             </property> -           <layout class="QGridLayout" name="gridLayout_11"> -            <item row="1" column="1"> -             <widget class="QSpinBox" name="ty_spin"> -              <property name="maximumSize"> -               <size> -                <width>150</width> -                <height>16777215</height> -               </size> -              </property> -              <property name="suffix"> -               <string> mm</string> -              </property> -              <property name="minimum"> -               <number>-65535</number> -              </property> -              <property name="maximum"> -               <number>65536</number> -              </property> -             </widget> -            </item> -            <item row="2" column="0"> -             <widget class="QLabel" name="label_66"> -              <property name="sizePolicy"> -               <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> -                <horstretch>0</horstretch> -                <verstretch>0</verstretch> -               </sizepolicy> -              </property> -              <property name="text"> -               <string>Right</string> -              </property> -             </widget> -            </item> -            <item row="2" column="1"> -             <widget class="QSpinBox" name="tz_spin"> -              <property name="maximumSize"> -               <size> -                <width>150</width> -                <height>16777215</height> -               </size> -              </property> -              <property name="suffix"> -               <string> mm</string> -              </property> -              <property name="minimum"> -               <number>-65535</number> -              </property> -              <property name="maximum"> -               <number>65536</number> -              </property> -             </widget> -            </item> -            <item row="0" column="0"> -             <widget class="QLabel" name="label_61"> -              <property name="sizePolicy"> -               <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> -                <horstretch>0</horstretch> -                <verstretch>0</verstretch> -               </sizepolicy> -              </property> -              <property name="text"> -               <string>Forward</string> -              </property> -             </widget> -            </item> -            <item row="0" column="1"> -             <widget class="QSpinBox" name="tx_spin"> -              <property name="maximumSize"> -               <size> -                <width>150</width> -                <height>16777215</height> -               </size> -              </property> -              <property name="suffix"> -               <string> mm</string> -              </property> -              <property name="minimum"> -               <number>-65535</number> -              </property> -              <property name="maximum"> -               <number>65536</number> -              </property> -             </widget> -            </item> -            <item row="1" column="0"> -             <widget class="QLabel" name="label_62"> -              <property name="sizePolicy"> -               <sizepolicy hsizetype="Maximum" vsizetype="Preferred"> -                <horstretch>0</horstretch> -                <verstretch>0</verstretch> -               </sizepolicy> -              </property> -              <property name="text"> -               <string>Up</string> -              </property> -             </widget> -            </item> -           </layout>            </widget>           </item> -         <item row="0" column="1"> -          <widget class="QFrame" name="frame_5"> +         <item> +          <widget class="QLabel" name="sample_count_display">             <property name="sizePolicy"> -            <sizepolicy hsizetype="Preferred" vsizetype="Expanding"> +            <sizepolicy hsizetype="Minimum" vsizetype="Maximum">               <horstretch>0</horstretch>               <verstretch>0</verstretch>              </sizepolicy>             </property> -           <property name="minimumSize"> -            <size> -             <width>260</width> -             <height>0</height> -            </size> -           </property>             <property name="frameShape"> -            <enum>QFrame::NoFrame</enum> +            <enum>QFrame::Panel</enum>             </property>             <property name="frameShadow"> -            <enum>QFrame::Raised</enum> +            <enum>QFrame::Sunken</enum> +           </property> +           <property name="text"> +            <string/> +           </property> +           <property name="wordWrap"> +            <bool>true</bool> +           </property> +          </widget> +         </item> +         <item> +          <widget class="QPushButton" name="tcalib_button"> +           <property name="enabled"> +            <bool>false</bool> +           </property> +           <property name="text"> +            <string>Start calibration</string> +           </property> +           <property name="checkable"> +            <bool>true</bool>             </property> -           <layout class="QVBoxLayout" name="verticalLayout_2"> -            <item> -             <widget class="QLabel" name="label_59"> -              <property name="text"> -               <string>Use only yaw and pitch while calibrating. -Don't roll or change position.</string> -              </property> -              <property name="alignment"> -               <set>Qt::AlignCenter</set> -              </property> -              <property name="wordWrap"> -               <bool>true</bool> -              </property> -              <property name="openExternalLinks"> -               <bool>false</bool> -              </property> -             </widget> -            </item> -            <item> -             <widget class="QLabel" name="sample_count_display"> -              <property name="sizePolicy"> -               <sizepolicy hsizetype="Minimum" vsizetype="Maximum"> -                <horstretch>0</horstretch> -                <verstretch>0</verstretch> -               </sizepolicy> -              </property> -              <property name="text"> -               <string/> -              </property> -              <property name="wordWrap"> -               <bool>true</bool> -              </property> -             </widget> -            </item> -            <item> -             <widget class="QPushButton" name="tcalib_button"> -              <property name="enabled"> -               <bool>false</bool> -              </property> -              <property name="text"> -               <string>Start calibration</string> -              </property> -              <property name="checkable"> -               <bool>true</bool> -              </property> -             </widget> -            </item> -           </layout>            </widget>           </item>          </layout> @@ -364,10 +613,19 @@ Don't roll or change position.</string>       </layout>      </widget>     </item> -   <item row="4" column="0"> -    <widget class="QCheckBox" name="showNetworkInput"> +   <item row="8" column="0"> +    <widget class="QLabel" name="resolution_display"> +     <property name="autoFillBackground"> +      <bool>true</bool> +     </property> +     <property name="frameShape"> +      <enum>QFrame::Panel</enum> +     </property> +     <property name="frameShadow"> +      <enum>QFrame::Sunken</enum> +     </property>       <property name="text"> -      <string>Show Network Input</string> +      <string notr="true"/>       </property>      </widget>     </item>  | 
