diff options
| author | Michael Welter <michael@welter-4d.de> | 2022-05-12 19:54:57 +0200 | 
|---|---|---|
| committer | Michael Welter <michael@welter-4d.de> | 2022-05-19 20:46:50 +0200 | 
| commit | 4d6e80a6c585f6929f6bf4bf319932732160ce84 (patch) | |
| tree | ffbef0e4a2625afccd9b3b1c3da682f54d1c32a1 /tracker-neuralnet | |
| parent | 25af3db8ba560d86f78aa4bfd10039588b7bab82 (diff) | |
tracker/nn: Add support for recurrent model and further tweaks
Regarding tweaks:
* EWA smoothing of head ROI. Smoothing strength is a UI setting.
* Adjustible zoom into the detected face. The predicted ROI is scaled by a factor the user can set. There is a sweet spot somewhere near 1.
* Adjustible number of threads
* The ROI is no longer taken as model output directly. This was actually not needed. Perhaps as auxiliary training objective for the network. But the tracker implementation now just uses a square area around the head center according to the predicted head size.
* Add comment and debug notification on face ROI model output
Diffstat (limited to 'tracker-neuralnet')
| -rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 272 | ||||
| -rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.h | 27 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/nl_NL.ts | 16 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/ru_RU.ts | 16 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/stub.ts | 16 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/zh_CN.ts | 16 | ||||
| -rw-r--r-- | tracker-neuralnet/neuralnet-trackercontrols.ui | 146 | 
7 files changed, 432 insertions, 77 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index 06874e6c..00f3f281 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -16,6 +16,7 @@  #include <opencv2/imgcodecs.hpp>  #include "compat/timer.hpp"  #include <omp.h> +#include <stdexcept>  #ifdef _MSC_VER  #   pragma warning(disable : 4702) @@ -29,6 +30,7 @@  #include <cmath>  #include <algorithm>  #include <chrono> +#include <string>  // Some demo code for onnx  // https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp @@ -43,9 +45,6 @@ using numeric_types::vec2;  using numeric_types::mat33;  using quat = std::array<numeric_types::f,4>; -// Minimal difference if at all going from 1 to 2 threads. -static constexpr int num_threads = 1; -  #if _MSC_VER  std::wstring convert(const QString &s) { return s.toStdWString(); } @@ -69,6 +68,41 @@ cv::Rect_<T> squarize(const cv::Rect_<T> &r)  } +template<class T> +cv::Point_<T> as_point(const cv::Size_<T>& s) +{ +    return { s.width, s.height }; +} + + +template<class T> +cv::Size_<T> as_size(const cv::Point_<T>& p) +{ +    return { p.x, p.y }; +} + + +template<class T> +cv::Rect_<T> expand(const cv::Rect_<T>& r, T factor) +{ +    // xnew = l+.5*w - w*f*0.5 = l + .5*(w - new_w) +    const cv::Size_<T> new_size = { r.width * factor, r.height * factor }; +    const cv::Point_<T> new_tl = r.tl() + (as_point(r.size()) - as_point(new_size)) / T(2); +    return cv::Rect_<T>(new_tl, new_size); +} + + +template<class T> +cv::Rect_<T> ewa_filter(const cv::Rect_<T>& last, const cv::Rect_<T>& current, T alpha) +{ +    const auto last_center = T(0.5) * (last.tl() + last.br()); +    const auto cur_center = T(0.5) * (current.tl() + current.br()); +    const cv::Point_<T> new_size = as_point(last.size()) + alpha * (as_point(current.size()) - as_point(last.size())); +    const cv::Point_<T> new_center = last_center + alpha * (cur_center - last_center); +    return cv::Rect_<T>(new_center - T(0.5) * new_size, as_size(new_size)); +} + +  cv::Rect2f unnormalize(const cv::Rect2f &r, int h, int w)  {      auto unnorm = [](float x) -> float { return 0.5*(x+1); }; @@ -138,6 +172,30 @@ mat33 quaternion_to_mat33(const std::array<float,4> quat)  } +vec3 rotate_vec(const quat& q, const vec3& p) +{ +    const float qw = q[0]; +    const float qi = q[1]; +    const float qj = q[2]; +    const float qk = q[3]; +    const float pi = p[0]; +    const float pj = p[1]; +    const float pk = p[2]; +    const quat tmp{ +                        - qi*pi - qj*pj - qk*pk, +        qw*pi                   + qj*pk - qk*pj, +        qw*pj - qi*pk                   + qk*pi, +        qw*pk + qi*pj - qj*pi                   +    }; +    const vec3 out { +        -tmp[0]*qi + tmp[1]*qw - tmp[2]*qk + tmp[3]*qj, +        -tmp[0]*qj + tmp[1]*qk + tmp[2]*qw - tmp[3]*qi, +        -tmp[0]*qk - tmp[1]*qj + tmp[2]*qi + tmp[3]*qw +    }; +    return out; +} + +  vec3 image_to_world(float x, float y, float size, float reference_size_in_mm, const cv::Size2i& image_size, const CamIntrinsics& intrinsics)  {      // Compute the location the network outputs in 3d space. @@ -186,8 +244,8 @@ T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b)  // have the shape B x C x H x W, where B=C=1.  cv::Size get_input_image_shape(const Ort::Session &session)  { -    if (session.GetInputCount() != 1) -        throw std::invalid_argument("Model must take exactly one input tensor"); +    if (session.GetInputCount() < 1) +        throw std::invalid_argument("Model must take at least one input tensor");      const std::vector<std::int64_t> shape =           session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();      if (shape.size() != 4) @@ -196,6 +254,17 @@ cv::Size get_input_image_shape(const Ort::Session &session)  } +Ort::Value create_tensor(const Ort::TypeInfo& info, Ort::Allocator& alloc) +{ +    const auto shape = info.GetTensorTypeAndShapeInfo().GetShape(); +    auto t = Ort::Value::CreateTensor<float>( +        alloc, shape.data(), shape.size()); +    memset(t.GetTensorMutableData<float>(), 0, sizeof(float)*info.GetTensorTypeAndShapeInfo().GetElementCount()); +    return t; +} + + +  int enum_to_fps(int value)  {      switch (value) @@ -241,10 +310,7 @@ std::pair<float, cv::Rect2f> Localizer::run(      Timer t; t.start(); -    const auto nt = omp_get_num_threads(); -    omp_set_num_threads(num_threads);      session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1); -    omp_set_num_threads(nt);      last_inference_time = t.elapsed_ms(); @@ -267,36 +333,85 @@ double Localizer::last_inference_time_millis() const  PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&_session)  -    : session{std::move(_session)} -    , model_version(session.GetModelMetadata().GetVersion()) +    : model_version{_session.GetModelMetadata().GetVersion()} +    , session{std::move(_session)} +    , allocator{session, allocator_info}  { +    using namespace std::literals::string_literals; + +    if (session.GetOutputCount() < 2) +        throw std::runtime_error("Invalid Model: must have at least two outputs"); + +    // WARNING +    // If the model was saved without meta data, it seems the version field is uninitialized. +    // In that case reading from it is UB. However, we will just get same arbitrary number +    // which is hopefully different from the numbers used by models where the version is set. +    if (model_version != 2) +        model_version = 1; +      const cv::Size input_image_shape = get_input_image_shape(session);      scaled_frame = cv::Mat(input_image_shape, CV_8U); -    input_mat = cv::Mat(input_image_shape, CV_32F); +    input_mat = cv::Mat(input_image_shape, CV_32F);       {          const std::int64_t input_shape[4] = { 1, 1, input_image_shape.height, input_image_shape.width }; -        input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4); +        input_val.push_back( +            Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4));      }      {          const std::int64_t output_shape[2] = { 1, 3 }; -        output_val[0] = Ort::Value::CreateTensor<float>( -            allocator_info, &output_coord[0], output_coord.rows, output_shape, 2); +        output_val.push_back(Ort::Value::CreateTensor<float>( +            allocator_info, &output_coord[0], output_coord.rows, output_shape, 2));      }      {          const std::int64_t output_shape[2] = { 1, 4 }; -        output_val[1] = Ort::Value::CreateTensor<float>( -            allocator_info, &output_quat[0], output_quat.rows, output_shape, 2); +        output_val.push_back(Ort::Value::CreateTensor<float>( +            allocator_info, &output_quat[0], output_quat.rows, output_shape, 2));      } +    size_t num_regular_outputs = 2; + +    if (session.GetOutputCount() >= 3 && "box"s == session.GetOutputName(2, allocator))      {          const std::int64_t output_shape[2] = { 1, 4 }; -        output_val[2] = Ort::Value::CreateTensor<float>( -            allocator_info, &output_box[0], output_box.rows, output_shape, 2); +        output_val.push_back(Ort::Value::CreateTensor<float>( +            allocator_info, &output_box[0], output_box.rows, output_shape, 2)); +        ++num_regular_outputs; +        qDebug() << "Note: Legacy model output for face ROI is currently ignored"; +    } + +    num_recurrent_states = session.GetInputCount()-1; +    if (session.GetOutputCount()-num_regular_outputs != num_recurrent_states) +        throw std::runtime_error("Invalid Model: After regular inputs and outputs the model must have equal number of inputs and outputs for tensors holding hidden states of recurrent layers."); + +    // Create tensors for recurrent state +    for (size_t i = 0; i < num_recurrent_states; ++i) +    { +        const auto& input_info = session.GetInputTypeInfo(1+i); +        const auto& output_info = session.GetOutputTypeInfo(num_regular_outputs+i); +        if (input_info.GetTensorTypeAndShapeInfo().GetShape() !=  +            output_info.GetTensorTypeAndShapeInfo().GetShape()) +            throw std::runtime_error("Invalid Model: Tensors for recurrent hidden states should have same shape on intput and output"); +        input_val.push_back(create_tensor(input_info, allocator)); +        output_val.push_back(create_tensor(output_info, allocator));      } + +    for (size_t i = 0; i < session.GetInputCount(); ++i) +    { +        input_names.push_back(session.GetInputName(i, allocator)); +    } +    for (size_t i = 0; i < session.GetOutputCount(); ++i) +    { +        output_names.push_back(session.GetOutputName(i, allocator)); +    } + +    qDebug() << "Model inputs: " << session.GetInputCount() << ", outputs: " << session.GetOutputCount() << ", recurrent states: " << num_recurrent_states; + +    assert (input_names.size() == input_val.size()); +    assert (output_names.size() == output_val.size());  } @@ -336,7 +451,7 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(      };      cv::getRectSubPix(frame, {patch_size, patch_size}, patch_center, cropped); -    // Will get failure if patch_center is outside image boundaries. +    // Will get failure if patch_center is outside image boundariesettings.      // Have to catch this case.      if (cropped.rows != patch_size || cropped.cols != patch_size)          return {}; @@ -355,26 +470,37 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(      assert (input_mat.ptr(0) == p);      assert (!input_mat.empty() && input_mat.isContinuous()); -    const char* input_names[] = {"x"}; -    const char* output_names[] = {"pos_size", "quat", "box"};      Timer t; t.start(); -    const auto nt = omp_get_num_threads(); -    omp_set_num_threads(num_threads);      try      { -        session.Run(Ort::RunOptions{ nullptr }, input_names, &input_val, 1, output_names, output_val, 3); +        session.Run( +            Ort::RunOptions{ nullptr },  +            input_names.data(),  +            input_val.data(),  +            input_val.size(),  +            output_names.data(),  +            output_val.data(),  +            output_val.size());      }      catch (const Ort::Exception &e)      {          qDebug() << "Failed to run the model: " << e.what(); -        omp_set_num_threads(nt);          return {};      } -    omp_set_num_threads(nt); -    // FIXME: Execution time fluctuates wildly. 19 to 26 ms. Why??? +    for (size_t i = 0; i<num_recurrent_states; ++i) +    { +        // Next step, the current output becomes the input. +        // Thus we realize the recurrent connection. +        // Only swaps the internal pointers. There is no copy of data. +        std::swap( +            output_val[output_val.size()-num_recurrent_states+i], +            input_val[input_val.size()-num_recurrent_states+i]); +    } + +    // FIXME: Execution time fluctuates wildly. 19 to 26 msec. Why?      //        The instructions are always the same. Maybe a memory allocation      //        issue. The ONNX api suggests that tensor are allocated in an      //        arena. Does that matter? Maybe the issue is something else? @@ -383,7 +509,7 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(      // Perform coordinate transformation.      // From patch-local normalized in [-1,1] to -    // frame unnormalized pixel coordinates. +    // frame unnormalized pixel coordinatesettings.      const cv::Point2f center = patch_center +           (0.5f*patch_size)*cv::Point2f{output_coord[0], output_coord[1]}; @@ -462,7 +588,7 @@ bool neuralnet_tracker::detect()      auto face = poseestimator->run(grayscale, *last_roi);      last_inference_time += poseestimator->last_inference_time_millis(); -     +      if (!face)      {          last_roi.reset(); @@ -470,7 +596,23 @@ bool neuralnet_tracker::detect()          return false;      } -    last_roi = face->box; +    { +        // Here: compute ROI from head size estimate. This helps make the size prediction more +        //       invariant to mouth opening. The tracking can be lost more often at extreme +        //       rotations, depending on the implementation details. The code down here has +        //       been tweaked so that it works pretty well. +        // In old behaviour  ROI is taken from the model outputs +        const vec3 offset = rotate_vec(face->rotation, vec3{0.f, 0.1f*face->size, face->size*0.3f}); +        const float halfsize = face->size/float(settings.roi_zoom); +        face->box = cv::Rect2f( +            face->center.x + offset[0] - halfsize, +            face->center.y + offset[1] - halfsize, +            halfsize*2.f, +            halfsize*2.f +        ); +    } + +    last_roi = ewa_filter(*last_roi, face->box, float(settings.roi_filter_alpha));      Affine pose = compute_pose(*face); @@ -487,6 +629,8 @@ bool neuralnet_tracker::detect()  Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const  { +    // Compute the location the network outputs in 3d space. +      const mat33 rot_correction = compute_rotation_correction(          normalize(face.center, frame.rows, frame.cols),          intrinsics.focal_length_w); @@ -507,7 +651,6 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const                 ------------------------      */ -    // Compute the location the network outputs in 3d space.      const vec3 face_world_pos = image_to_world(          face.center.x, face.center.y, face.size, head_size_mm,          frame.size(), @@ -519,9 +662,9 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const      const vec3 pos = face_world_pos          + m * vec3{ -            static_cast<float>(s.offset_fwd),  -            static_cast<float>(s.offset_up), -            static_cast<float>(s.offset_right)}; +            static_cast<float>(settings.offset_fwd),  +            static_cast<float>(settings.offset_up), +            static_cast<float>(settings.offset_right)};      return { m, pos };  } @@ -566,7 +709,7 @@ void neuralnet_tracker::draw_gizmos(          cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1);      } -    if (s.show_network_input) +    if (settings.show_network_input)      {          cv::Mat netinput = poseestimator->last_network_input();          if (!netinput.empty()) @@ -587,7 +730,6 @@ void neuralnet_tracker::draw_gizmos(  neuralnet_tracker::neuralnet_tracker()  {      opencv_init(); -    cv::setNumThreads(num_threads);  } @@ -609,6 +751,8 @@ module_status neuralnet_tracker::start_tracker(QFrame* videoframe)      layout->addWidget(&*videoWidget);      videoframe->setLayout(&*layout);      videoWidget->show(); +    num_threads = settings.num_threads; +    cv::setNumThreads(num_threads);      start();      return status_ok();  } @@ -633,7 +777,7 @@ bool neuralnet_tracker::load_and_initialize_model()          // openmp settings. Which is what we do. omp_set_num_threads directly          // before running the inference pass.          opts.SetIntraOpNumThreads(num_threads); -        opts.SetInterOpNumThreads(num_threads); +        opts.SetInterOpNumThreads(1);          allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);          localizer.emplace( @@ -656,11 +800,11 @@ bool neuralnet_tracker::load_and_initialize_model()  bool neuralnet_tracker::open_camera()  { -    int fps = enum_to_fps(s.force_fps); +    int fps = enum_to_fps(settings.force_fps);      QMutexLocker l(&camera_mtx); -    camera = video::make_camera(s.camera_name); +    camera = video::make_camera(settings.camera_name);      if (!camera)          return false; @@ -673,7 +817,7 @@ bool neuralnet_tracker::open_camera()      if (fps)          args.fps = fps; -    args.use_mjpeg = s.use_mjpeg; +    args.use_mjpeg = settings.use_mjpeg;      if (!camera->start(args))      { @@ -687,7 +831,7 @@ bool neuralnet_tracker::open_camera()  void neuralnet_tracker::set_intrinsics()  {      const int w = grayscale.cols, h = grayscale.rows; -    const double diag_fov = s.fov * M_PI / 180.; +    const double diag_fov = settings.fov * M_PI / 180.;      const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));      const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));      const double focal_length_w = 1. / tan(.5 * fov_w); @@ -746,8 +890,13 @@ void neuralnet_tracker::run()          set_intrinsics(); +        const auto nt = omp_get_num_threads(); +        omp_set_num_threads(num_threads); +          detect(); +        omp_set_num_threads(nt); +          if (frame.rows > 0)              videoWidget->update_image(frame); @@ -823,26 +972,29 @@ neuralnet_dialog::neuralnet_dialog() :      ui.setupUi(this);      make_fps_combobox(); -    tie_setting(s.force_fps, ui.cameraFPS); +    tie_setting(settings.force_fps, ui.cameraFPS);      for (const auto& str : video::camera_names())          ui.cameraName->addItem(str); -    tie_setting(s.camera_name, ui.cameraName); -    tie_setting(s.fov, ui.cameraFOV); -    tie_setting(s.offset_fwd, ui.tx_spin); -    tie_setting(s.offset_up, ui.ty_spin); -    tie_setting(s.offset_right, ui.tz_spin); -    tie_setting(s.show_network_input, ui.showNetworkInput); -    tie_setting(s.use_mjpeg, ui.use_mjpeg); +    tie_setting(settings.camera_name, ui.cameraName); +    tie_setting(settings.fov, ui.cameraFOV); +    tie_setting(settings.offset_fwd, ui.tx_spin); +    tie_setting(settings.offset_up, ui.ty_spin); +    tie_setting(settings.offset_right, ui.tz_spin); +    tie_setting(settings.show_network_input, ui.showNetworkInput); +    tie_setting(settings.roi_filter_alpha, ui.roiFilterAlpha); +    tie_setting(settings.use_mjpeg, ui.use_mjpeg); +	tie_setting(settings.roi_zoom, ui.roiZoom); +    tie_setting(settings.num_threads, ui.threadCount);      connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK()));      connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel()));      connect(ui.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings())); -    connect(&s.camera_name, value_::value_changed<QString>(), this, &neuralnet_dialog::update_camera_settings_state); +    connect(&settings.camera_name, value_::value_changed<QString>(), this, &neuralnet_dialog::update_camera_settings_state); -    update_camera_settings_state(s.camera_name); +    update_camera_settings_state(settings.camera_name);      connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step);      calib_timer.setInterval(35); @@ -852,7 +1004,7 @@ neuralnet_dialog::neuralnet_dialog() :  void neuralnet_dialog::doOK()  { -    s.b->save(); +    settings.b->save();      close();  } @@ -871,7 +1023,7 @@ void neuralnet_dialog::camera_settings()          (void)tracker->camera->show_dialog();      }      else -        (void)video::show_dialog(s.camera_name); +        (void)video::show_dialog(settings.camera_name);  } @@ -942,9 +1094,9 @@ void neuralnet_dialog::startstop_trans_calib(bool start)          trans_calib.reset();          ui.sample_count_display->setText(QString());          // Tracker must run with zero'ed offset for calibration. -        s.offset_fwd = 0; -        s.offset_up = 0; -        s.offset_right = 0; +        settings.offset_fwd = 0; +        settings.offset_up = 0; +        settings.offset_right = 0;      }      else      { @@ -952,9 +1104,9 @@ void neuralnet_dialog::startstop_trans_calib(bool start)          qDebug() << "pt: stopping translation calibration";          {              auto [tmp, nsamples] = trans_calib.get_estimate(); -            s.offset_fwd = int(tmp[0]); -            s.offset_up = int(tmp[1]); -            s.offset_right = int(tmp[2]); +            settings.offset_fwd = int(tmp[0]); +            settings.offset_up = int(tmp[1]); +            settings.offset_right = int(tmp[2]);          }      }      ui.tx_spin->setEnabled(!start); @@ -968,7 +1120,7 @@ void neuralnet_dialog::startstop_trans_calib(bool start)  } -settings::settings() : opts("neuralnet-tracker") {} +Settings::Settings() : opts("neuralnet-tracker") {}  } // neuralnet_tracker_ns diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index b6405d1a..25f1a0a2 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -50,7 +50,7 @@ enum fps_choices  }; -struct settings : opts { +struct Settings : opts {      value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters                 offset_up { b, "offset-up", 0 },                 offset_right { b, "offset-right", 0 }; @@ -58,8 +58,11 @@ struct settings : opts {      value<int> fov { b, "field-of-view", 56 };      value<fps_choices> force_fps { b, "force-fps", fps_default };      value<bool> show_network_input { b, "show-network-input", false }; +    value<double> roi_filter_alpha{ b, "roi-filter-alpha", 1. }; +    value<double> roi_zoom{ b, "roi-zoom", 1. };      value<bool> use_mjpeg { b, "use-mjpeg", false }; -    settings(); +    value<int> num_threads { b, "num-threads", 1 }; +    Settings();  }; @@ -118,23 +121,26 @@ class PoseEstimator      private:          // Operates on the private image data members          int find_input_intensity_90_pct_quantile() const; + +        int64_t model_version = 0;          Ort::Session session{nullptr}; +        Ort::Allocator allocator;          // Inputs          cv::Mat scaled_frame{}, input_mat{}; -        Ort::Value input_val{nullptr}; +        std::vector<Ort::Value> input_val; +        std::vector<const char*> input_names;          // Outputs          cv::Vec<float, 3> output_coord{};          cv::Vec<float, 4> output_quat{};          cv::Vec<float, 4> output_box{}; -        Ort::Value output_val[3] = { -            Ort::Value{nullptr},  -            Ort::Value{nullptr},  -            Ort::Value{nullptr}}; +        std::vector<Ort::Value> output_val; +        std::vector<const char*> output_names; +        size_t num_recurrent_states = 0;          double last_inference_time = 0; -        int64_t model_version = 0;  }; +  class neuralnet_tracker : protected virtual QThread, public ITracker  {      Q_OBJECT @@ -162,7 +168,7 @@ private:      Affine compute_pose(const PoseEstimator::Face &face) const; -    settings s; +    Settings settings;      std::optional<Localizer> localizer;      std::optional<PoseEstimator> poseestimator;      Ort::Env env{nullptr}; @@ -177,6 +183,7 @@ private:      double fps = 0;      double last_inference_time = 0;      static constexpr double RC = .25; +    int num_threads = 1;      QMutex mtx; // Protects the pose      Affine pose_; @@ -197,7 +204,7 @@ private:      void make_fps_combobox();      Ui::Form ui; -    settings s; +    Settings settings;      // Calibration code mostly taken from point tracker      QTimer calib_timer; diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts index a2dcd958..95da8f4c 100644 --- a/tracker-neuralnet/lang/nl_NL.ts +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -64,6 +64,22 @@ Don't roll or change position.</source>          <source>MJPEG</source>          <translation type="unfinished"></translation>      </message> +    <message> +        <source>Tuning / Debug</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Smoothing Alpha</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Zoom</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Thread Count</source> +        <translation type="unfinished"></translation> +    </message>  </context>  <context>      <name>neuralnet_tracker_ns::neuralnet_dialog</name> diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index 7e8a9c09..a8252299 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -64,6 +64,22 @@ Don't roll or change position.</source>          <source>MJPEG</source>          <translation type="unfinished"></translation>      </message> +    <message> +        <source>Tuning / Debug</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Smoothing Alpha</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Zoom</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Thread Count</source> +        <translation type="unfinished"></translation> +    </message>  </context>  <context>      <name>neuralnet_tracker_ns::neuralnet_dialog</name> diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts index 66e1695c..80103fde 100644 --- a/tracker-neuralnet/lang/stub.ts +++ b/tracker-neuralnet/lang/stub.ts @@ -64,6 +64,22 @@ Don't roll or change position.</source>          <source>MJPEG</source>          <translation type="unfinished"></translation>      </message> +    <message> +        <source>Tuning / Debug</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Smoothing Alpha</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Zoom</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Thread Count</source> +        <translation type="unfinished"></translation> +    </message>  </context>  <context>      <name>neuralnet_tracker_ns::neuralnet_dialog</name> diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts index 8e9513b3..f55c12fc 100644 --- a/tracker-neuralnet/lang/zh_CN.ts +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -64,6 +64,22 @@ Don't roll or change position.</source>          <source>MJPEG</source>          <translation type="unfinished"></translation>      </message> +    <message> +        <source>Tuning / Debug</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Smoothing Alpha</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>ROI Zoom</source> +        <translation type="unfinished"></translation> +    </message> +    <message> +        <source>Thread Count</source> +        <translation type="unfinished"></translation> +    </message>  </context>  <context>      <name>neuralnet_tracker_ns::neuralnet_dialog</name> diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui index 5f72a809..acbfe909 100644 --- a/tracker-neuralnet/neuralnet-trackercontrols.ui +++ b/tracker-neuralnet/neuralnet-trackercontrols.ui @@ -9,15 +9,15 @@     <rect>      <x>0</x>      <y>0</y> -    <width>727</width> -    <height>202</height> +    <width>721</width> +    <height>277</height>     </rect>    </property>    <property name="windowTitle">     <string>Tracker settings</string>    </property>    <layout class="QGridLayout" name="gridLayout"> -   <item row="5" column="0"> +   <item row="9" column="0">      <widget class="QDialogButtonBox" name="buttonBox">       <property name="standardButtons">        <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> @@ -364,11 +364,143 @@ Don't roll or change position.</string>       </layout>      </widget>     </item> -   <item row="4" column="0"> -    <widget class="QCheckBox" name="showNetworkInput"> -     <property name="text"> -      <string>Show Network Input</string> +   <item row="5" column="0"> +    <widget class="QGroupBox" name="tuningOptionsBox"> +     <property name="sizePolicy"> +      <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> +       <horstretch>0</horstretch> +       <verstretch>0</verstretch> +      </sizepolicy> +     </property> +     <property name="minimumSize"> +      <size> +       <width>0</width> +       <height>0</height> +      </size> +     </property> +     <property name="title"> +      <string>Tuning / Debug</string>       </property> +     <layout class="QGridLayout" name="gridLayout_2"> +      <item row="0" column="7"> +       <widget class="Line" name="line_3"> +        <property name="orientation"> +         <enum>Qt::Vertical</enum> +        </property> +       </widget> +      </item> +      <item row="0" column="8"> +       <widget class="QLabel" name="threadCountLabel"> +        <property name="text"> +         <string>Thread Count</string> +        </property> +       </widget> +      </item> +      <item row="0" column="2"> +       <widget class="QLabel" name="roiFilterAlphaLabel"> +        <property name="sizePolicy"> +         <sizepolicy hsizetype="Minimum" vsizetype="Minimum"> +          <horstretch>0</horstretch> +          <verstretch>0</verstretch> +         </sizepolicy> +        </property> +        <property name="text"> +         <string>ROI Smoothing Alpha</string> +        </property> +       </widget> +      </item> +      <item row="0" column="1"> +       <widget class="Line" name="line"> +        <property name="orientation"> +         <enum>Qt::Vertical</enum> +        </property> +       </widget> +      </item> +      <item row="0" column="0"> +       <widget class="QCheckBox" name="showNetworkInput"> +        <property name="sizePolicy"> +         <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> +          <horstretch>0</horstretch> +          <verstretch>0</verstretch> +         </sizepolicy> +        </property> +        <property name="text"> +         <string>Show Network Input</string> +        </property> +       </widget> +      </item> +      <item row="0" column="3"> +       <widget class="QDoubleSpinBox" name="roiFilterAlpha"> +        <property name="sizePolicy"> +         <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> +          <horstretch>0</horstretch> +          <verstretch>0</verstretch> +         </sizepolicy> +        </property> +        <property name="maximumSize"> +         <size> +          <width>150</width> +          <height>16777215</height> +         </size> +        </property> +        <property name="wrapping"> +         <bool>false</bool> +        </property> +        <property name="decimals"> +         <number>2</number> +        </property> +        <property name="maximum"> +         <double>1.000000000000000</double> +        </property> +        <property name="singleStep"> +         <double>0.010000000000000</double> +        </property> +        <property name="value"> +         <double>1.000000000000000</double> +        </property> +       </widget> +      </item> +      <item row="0" column="5"> +       <widget class="QLabel" name="roiZoomLabel"> +        <property name="text"> +         <string>ROI Zoom</string> +        </property> +       </widget> +      </item> +      <item row="0" column="4"> +       <widget class="Line" name="line_2"> +        <property name="orientation"> +         <enum>Qt::Vertical</enum> +        </property> +       </widget> +      </item> +      <item row="0" column="6"> +       <widget class="QDoubleSpinBox" name="roiZoom"> +        <property name="minimum"> +         <double>0.100000000000000</double> +        </property> +        <property name="maximum"> +         <double>2.000000000000000</double> +        </property> +        <property name="singleStep"> +         <double>0.010000000000000</double> +        </property> +        <property name="value"> +         <double>1.000000000000000</double> +        </property> +       </widget> +      </item> +      <item row="0" column="9"> +       <widget class="QSpinBox" name="threadCount"> +        <property name="minimum"> +         <number>1</number> +        </property> +        <property name="maximum"> +         <number>32</number> +        </property> +       </widget> +      </item> +     </layout>      </widget>     </item>    </layout>  | 
