diff options
Diffstat (limited to 'tracker-neuralnet')
| -rw-r--r-- | tracker-neuralnet/CMakeLists.txt | 14 | ||||
| -rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 71 | ||||
| -rw-r--r-- | tracker-neuralnet/lang/ru_RU.ts | 107 | ||||
| -rw-r--r-- | tracker-neuralnet/model_adapters.cpp | 41 | ||||
| -rw-r--r-- | tracker-neuralnet/model_adapters.h | 13 | 
5 files changed, 126 insertions, 120 deletions
| diff --git a/tracker-neuralnet/CMakeLists.txt b/tracker-neuralnet/CMakeLists.txt index f414c920..db568fae 100644 --- a/tracker-neuralnet/CMakeLists.txt +++ b/tracker-neuralnet/CMakeLists.txt @@ -1,4 +1,9 @@  include(opentrack-opencv) +set(host-spec "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_SIZEOF_VOID_P}") +if(host-spec MATCHES "^Linux i[3-6]86 4$") +    return() +endif() +  find_package(OpenCV QUIET)  find_package(OpenMP QUIET) # Used to control number of onnx threads.  find_package(ONNXRuntime QUIET) @@ -11,16 +16,21 @@ if(OpenCV_FOUND AND ONNXRuntime_FOUND AND OpenMP_FOUND)      otr_module(tracker-neuralnet) -    target_link_libraries(${self}  +    target_link_libraries(${self}          opentrack-cv          onnxruntime::onnxruntime          opencv_calib3d          opencv_imgproc          opencv_imgcodecs          opencv_core -        OpenMP::OpenMP_C +        OpenMP::OpenMP_CXX          ) +    # OpenMP::OpenMP_CXX doesn't set up the -fopenmp linking option, so set it up ourselves. +    if(NOT MSVC) +        target_link_options(${self} PUBLIC ${OpenMP_CXX_FLAGS}) +    endif() +      install(          FILES "models/head-localizer.onnx"                 "models/head-pose.onnx" diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index a1a3526b..59e17063 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -13,7 +13,6 @@  #include "compat/math-imports.hpp"  #include "compat/timer.hpp"  #include "compat/check-visible.hpp" -#include "compat/camera-names.hpp"  #include "cv/init.hpp"  #include <omp.h> @@ -84,8 +83,7 @@ struct OnScopeExit  CamIntrinsics make_intrinsics(const cv::Mat& img, const Settings& settings)  {      const int w = img.cols, h = img.rows; -    //const double diag_fov = settings.fov * M_PI / 180.; -    const double diag_fov = 60 * M_PI / 180.; (void)settings; +    const double diag_fov = settings.fov * M_PI / 180.;      const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));      const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));      const double focal_length_w = 1. / tan(.5 * fov_w); @@ -353,7 +351,7 @@ bool NeuralNetTracker::detect()          last_pose_affine_ = pose_affine;      } -    draw_gizmos(*face, pose_affine); +    draw_gizmos(*face, last_pose_affine_);      return true;  } @@ -503,38 +501,17 @@ bool NeuralNetTracker::load_and_initialize_model()  bool NeuralNetTracker::open_camera()  { -#if 0      int rint = std::clamp(*settings_.resolution, 0, (int)std::size(resolution_choices)-1);      resolution_tuple res = resolution_choices[rint];      int fps = enum_to_fps(settings_.force_fps); -#endif - -    video::impl::camera::info args {}; -    args.width = 640; -    args.height = 480; -    args.fps = 60; -    args.use_mjpeg = true;      QMutexLocker l(&camera_mtx_); -    camera_ = nullptr; -    const QString name = settings_.camera_name; - -    if (name.isEmpty() || name == "TrackHat sensor") -    { -        camera_ = video::make_camera_("TrackHat sensor"); -        if (camera_ && camera_->start(args)) -            return true; -        if (!name.isEmpty()) -            return false; -    } - -    camera_ = video::make_camera(name); +    camera_ = video::make_camera(settings_.camera_name);      if (!camera_)          return false; -#if 0      video::impl::camera::info args {};      if (res.width) @@ -546,7 +523,6 @@ bool NeuralNetTracker::open_camera()          args.fps = fps;      args.use_mjpeg = settings_.use_mjpeg; -#endif      if (!camera_->start(args))      { @@ -624,8 +600,6 @@ void NeuralNetTracker::run()              std::chrono::duration_cast<std::chrono::milliseconds>(                  clk.now() - t).count()*1.e-3);      } - -    camera_ = nullptr;  } @@ -670,23 +644,19 @@ void NeuralNetTracker::update_fps(double dt)  void NeuralNetTracker::data(double *data)  { -    auto tmp2 = [&]() +    Affine tmp = [&]()      {          QMutexLocker lck(&mtx_);          return last_pose_affine_;      }(); -    if (!tmp2) -        return; -    const auto& tmp = *tmp2; -      const auto& mx = tmp.R.col(0);      const auto& my = tmp.R.col(1);      const auto& mz = -tmp.R.col(2);      const float yaw = std::atan2(mx(2), mx(0));      const float pitch = -std::atan2(-mx(1), std::sqrt(mx(2)*mx(2)+mx(0)*mx(0))); -    const float roll = -std::atan2(-my(2), mz(2)); +    const float roll = std::atan2(-my(2), mz(2));      {          constexpr double rad2deg = 180/M_PI;          data[Yaw]   = rad2deg * yaw; @@ -704,7 +674,7 @@ void NeuralNetTracker::data(double *data)  Affine NeuralNetTracker::pose()  {      QMutexLocker lck(&mtx_); -    return last_pose_affine_ ? *last_pose_affine_ : Affine{}; +    return last_pose_affine_;  }  std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const @@ -715,19 +685,16 @@ std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const  void NeuralNetDialog::make_fps_combobox()  { -#if 0      for (int k = 0; k < fps_MAX; k++)      {          const int hz = enum_to_fps(k);          const QString name = (hz == 0) ? tr("Default") : QString::number(hz);          ui_.cameraFPS->addItem(name, k);      } -#endif  }  void NeuralNetDialog::make_resolution_combobox()  { -#if 0      int k=0;      for (const auto [w, h] : resolution_choices)      { @@ -736,7 +703,6 @@ void NeuralNetDialog::make_resolution_combobox()              : QString::number(w) + " x " + QString::number(h);          ui_.resolution->addItem(s, k++);      } -#endif  } @@ -748,44 +714,21 @@ NeuralNetDialog::NeuralNetDialog() :      make_fps_combobox();      make_resolution_combobox(); -    ui_.cameraName->addItem(QString{});      for (const auto& str : video::camera_names())          ui_.cameraName->addItem(str);      tie_setting(settings_.camera_name, ui_.cameraName); -#if 0      tie_setting(settings_.fov, ui_.cameraFOV); -#endif      tie_setting(settings_.offset_fwd, ui_.tx_spin);      tie_setting(settings_.offset_up, ui_.ty_spin);      tie_setting(settings_.offset_right, ui_.tz_spin);      tie_setting(settings_.show_network_input, ui_.showNetworkInput);      tie_setting(settings_.roi_filter_alpha, ui_.roiFilterAlpha); -#if 0      tie_setting(settings_.use_mjpeg, ui_.use_mjpeg); -#endif  	tie_setting(settings_.roi_zoom, ui_.roiZoom);      tie_setting(settings_.num_threads, ui_.threadCount); -#if 0      tie_setting(settings_.resolution, ui_.resolution);      tie_setting(settings_.force_fps, ui_.cameraFPS); -#endif - -    { -        const struct { -            QString label; -            exposure_preset preset; -        } presets[] = { -            { QStringLiteral("Near (1-4ft)"), exposure_preset::near     }, -            { QStringLiteral("Far (4-8ft)"),  exposure_preset::far      }, -            { QStringLiteral("Custom"),       exposure_preset::ignored  }, -        }; - -        for (const auto& [label, preset] : presets) -            ui_.exposure_preset->addItem(label, int(preset)); - -        tie_setting(cs_.exposure, ui_.exposure_preset); -    }      connect(ui_.buttonBox, SIGNAL(accepted()), this, SLOT(doOK()));      connect(ui_.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); @@ -807,13 +750,11 @@ NeuralNetDialog::NeuralNetDialog() :  void NeuralNetDialog::save()  {      settings_.b->save(); -    cs_.b->save();  }  void NeuralNetDialog::reload()  {      settings_.b->reload(); -    cs_.b->reload();  }  void NeuralNetDialog::doOK() diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index 4c9cec04..b191e769 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -5,123 +5,150 @@      <name>Form</name>      <message>          <source>Tracker settings</source> -        <translation type="unfinished"></translation> +        <translation>Настройки трекера</translation> +    </message> +    <message> +        <source>Diagonal FOV</source> +        <translation>Угол обзора</translation>      </message>      <message>          <source>Camera settings</source> -        <translation type="unfinished"></translation> +        <translation>Настройки камеры</translation> +    </message> +    <message> +        <source>Frames per second</source> +        <translation>Кадры в секунду</translation> +    </message> +    <message> +        <source>Camera name</source> +        <translation>Камера</translation> +    </message> +    <message> +        <source>Camera Configuration</source> +        <translation>Конфигурация камеры</translation>      </message>      <message>          <source>Head Center Offset</source> -        <translation type="unfinished"></translation> +        <translation>Смещение центра головы</translation>      </message>      <message>          <source> mm</source> -        <translation type="unfinished"></translation> +        <translation> мм</translation>      </message>      <message>          <source>Use only yaw and pitch while calibrating.  Don't roll or change position.</source> -        <translation type="unfinished"></translation> +        <translation>Поворачивайте голову влево-вправо и наклоняйте вверх-вниз. +Не наклоняйте набок и не смещайте голову в сторону.</translation>      </message>      <message>          <source>Start calibration</source> -        <translation type="unfinished"></translation> +        <translation>Начать калибровку</translation>      </message>      <message>          <source>Right</source> -        <translation type="unfinished"></translation> +        <translation>Вправо</translation>      </message>      <message>          <source>Forward</source> -        <translation type="unfinished"></translation> +        <translation>Вперед</translation>      </message>      <message>          <source>Up</source> -        <translation type="unfinished"></translation> +        <translation>Вверх</translation>      </message>      <message> -        <source>Exposure preset</source> -        <translation type="unfinished"></translation> +        <source>Show Network Input</source> +        <translation>Показать входные данные</translation>      </message>      <message> -        <source>Camera Configuration</source> -        <translation type="unfinished"></translation> +        <source>MJPEG</source> +        <translation>Использовать MJPEG</translation>      </message>      <message>          <source>Tuning / Debug</source> -        <translation type="unfinished"></translation> -    </message> -    <message> -        <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> -        <translation type="unfinished"></translation> +        <translation>Тонкая настройка</translation>      </message>      <message>          <source>ROI Smoothing Alpha</source> -        <translation type="unfinished"></translation> +        <translation>Сглаживание ROI</translation>      </message>      <message>          <source>ROI Zoom</source> -        <translation type="unfinished"></translation> +        <translation>Масштабирование ROI</translation>      </message>      <message> -        <source>Show the image patch that the pose estimation model sees.</source> -        <translation type="unfinished"></translation> +        <source>Thread Count</source> +        <translation>Количество потоков</translation>      </message>      <message> -        <source>Show Network Input</source> -        <translation type="unfinished"></translation> +        <source>Resolution</source> +        <translation>Разрешение</translation>      </message>      <message> -        <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> -        <translation type="unfinished"></translation> +        <source>Field of view. Needed to transform the pose to world coordinates.</source> +        <translation>Угол обзора камеры. Требуется для преобразования положения головы в глобальные координаты</translation>      </message>      <message> -        <source>Thread Count</source> -        <translation type="unfinished"></translation> +        <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> +        <translation>Частота кадров. Реальные значения могут не поддерживаться камерой.</translation>      </message>      <message> -        <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> -        <translation type="unfinished"></translation> +        <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> +        <translation>Разрешение камеры, для тех случаев, когда быстродействие камеры максимально в определенном разрешении. Может быть масштабировано до внутреннего разрешения.</translation> +    </message> +    <message> +        <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> +        <translation>Количество потоков. Используется для балансировки нагрузки на процессор между игрой и трекером.</translation>      </message>      <message> -        <source>Camera override</source> -        <translation type="unfinished"></translation> +        <source>Show the image patch that the pose estimation model sees.</source> +        <translation>Показать изображение, используемое моделью определения позиции</translation> +    </message> +    <message> +        <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> +        <translation>Сглаживание координат области лица. Может помочь стабилизировать позицию.</translation> +    </message> +    <message> +        <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> +        <translation>Фактор масштабирования области лица. Применяется перед передачей кадра в модель определения позиции. Наилучшие результаты близки к 1</translation>      </message>  </context>  <context>      <name>neuralnet_tracker_ns::NeuralNetDialog</name>      <message>          <source>Default</source> -        <translation type="unfinished"></translation> +        <translation>По умолчанию</translation>      </message>      <message>          <source>Tracker Offline</source> -        <translation type="unfinished"></translation> +        <translation>Трекер выключен</translation>      </message>      <message>          <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> -        <translation type="unfinished"></translation> +        <translation>%1x%2 @ %3 FPS; Время оценки: %4 мс</translation>      </message>      <message>          <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> -        <translation type="unfinished"></translation> +        <translation>Сэмплов поворота: %1. +Поворачивайте голову в стороны до %2 сэмплов для стабильной калибрации.</translation>      </message>      <message>          <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> -        <translation type="unfinished"></translation> +        <translation>Сэмплов наклона: %1. +Наклоняйте голову вниз/вверх до %2 сэмплов для стабильной калибрации.</translation>      </message>      <message>          <source>%1 samples. Over %2, good!</source> -        <translation type="unfinished"></translation> +        <translation>%1 сэмплов. Более %2, достаточно.</translation>      </message>      <message>          <source>Stop calibration</source> -        <translation type="unfinished"></translation> +        <translation>Остановить калибровку</translation>      </message>      <message>          <source>Start calibration</source> -        <translation type="unfinished"></translation> +        <translation>Начать калибровку</translation>      </message>  </context>  </TS> diff --git a/tracker-neuralnet/model_adapters.cpp b/tracker-neuralnet/model_adapters.cpp index af599321..a8580a89 100644 --- a/tracker-neuralnet/model_adapters.cpp +++ b/tracker-neuralnet/model_adapters.cpp @@ -8,7 +8,6 @@  #include <QDebug> -  namespace neuralnet_tracker_ns  { @@ -165,6 +164,24 @@ double Localizer::last_inference_time_millis() const  } +std::string PoseEstimator::get_network_input_name(size_t i) const +{ +#if ORT_API_VERSION >= 12 +    return std::string(&*session_.GetInputNameAllocated(i, allocator_)); +#else +    return std::string(session_.GetInputName(i, allocator_)); +#endif +} + +std::string PoseEstimator::get_network_output_name(size_t i) const +{ +#if ORT_API_VERSION >= 12 +    return std::string(&*session_.GetOutputNameAllocated(i, allocator_)); +#else +    return std::string(session_.GetOutputName(i, allocator_)); +#endif +} +  PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session)       : model_version_{session.GetModelMetadata().GetVersion()}      , session_{std::move(session)} @@ -215,14 +232,16 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses      qDebug() << "Pose model inputs (" << session_.GetInputCount() << ")";      qDebug() << "Pose model outputs (" << session_.GetOutputCount() << "):"; +    output_names_.resize(session_.GetOutputCount()); +    output_c_names_.resize(session_.GetOutputCount());      for (size_t i=0; i<session_.GetOutputCount(); ++i)      { -        const char* name = session_.GetOutputName(i, allocator_); +        std::string name = get_network_output_name(i);          const auto& output_info = session_.GetOutputTypeInfo(i);          const auto& onnx_tensor_spec = output_info.GetTensorTypeAndShapeInfo();          auto my_tensor_spec = understood_outputs.find(name); -        qDebug() << "\t" << name << " (" << onnx_tensor_spec.GetShape() << ") dtype: " <<  onnx_tensor_spec.GetElementType() << " " << +        qDebug() << "\t" << name.c_str() << " (" << onnx_tensor_spec.GetShape() << ") dtype: " <<  onnx_tensor_spec.GetElementType() << " " <<              (my_tensor_spec != understood_outputs.end() ? "ok" : "unknown");          if (my_tensor_spec != understood_outputs.end()) @@ -240,7 +259,8 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses              // Create tensor regardless and ignore output              output_val_.push_back(create_tensor(output_info, allocator_));          } -        output_names_.push_back(name); +        output_names_[i] = name; +        output_c_names_[i] = output_names_[i].c_str();      }      has_uncertainty_ = understood_outputs.at("rotaxis_scales_tril").available || @@ -270,9 +290,12 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses      //     output_val_.push_back(create_tensor(output_info, allocator_));      // } +    input_names_.resize(session_.GetInputCount()); +    input_c_names_.resize(session_.GetInputCount());      for (size_t i = 0; i < session_.GetInputCount(); ++i)      { -        input_names_.push_back(session_.GetInputName(i, allocator_)); +        input_names_[i] = get_network_input_name(i); +        input_c_names_[i] = input_names_[i].c_str();      }      assert (input_names_.size() == input_val_.size()); @@ -312,11 +335,11 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(      {          session_.Run(              Ort::RunOptions{ nullptr },  -            input_names_.data(),  +            input_c_names_.data(),              input_val_.data(),               input_val_.size(),  -            output_names_.data(),  -            output_val_.data(),  +            output_c_names_.data(), +            output_val_.data(),              output_val_.size());      }      catch (const Ort::Exception &e) @@ -430,4 +453,4 @@ double PoseEstimator::last_inference_time_millis() const -} // namespace neuralnet_tracker_ns
\ No newline at end of file +} // namespace neuralnet_tracker_ns diff --git a/tracker-neuralnet/model_adapters.h b/tracker-neuralnet/model_adapters.h index 3fbfb861..820330cf 100644 --- a/tracker-neuralnet/model_adapters.h +++ b/tracker-neuralnet/model_adapters.h @@ -3,6 +3,7 @@  #include <optional>  #include <array>  #include <vector> +#include <string>  #include <onnxruntime_cxx_api.h>  #include <opencv2/core.hpp> @@ -21,7 +22,7 @@ class Localizer      public:          Localizer(Ort::MemoryInfo &allocator_info,                      Ort::Session &&session); -         +          // Returns bounding wrt image coordinate of the input image          // The preceeding float is the score for being a face normalized to [0,1].          std::pair<float, cv::Rect2f> run( @@ -68,13 +69,16 @@ class PoseEstimator          bool has_uncertainty() const { return has_uncertainty_; }      private: +        std::string get_network_input_name(size_t i) const; +        std::string get_network_output_name(size_t i) const;          int64_t model_version_ = 0;  // Queried meta data from the ONNX file          Ort::Session session_{nullptr};  // ONNX's runtime context for running the model          Ort::Allocator allocator_;   // Memory allocator for tensors          // Inputs          cv::Mat scaled_frame_{}, input_mat_{};  // Input. One is the original crop, the other is rescaled (?)          std::vector<Ort::Value> input_val_;    // Tensors to put into the model -        std::vector<const char*> input_names_; // Refers to the names in the onnx model.  +        std::vector<std::string> input_names_; // Refers to the names in the onnx model. +        std::vector<const char *> input_c_names_; // Refers to the C names in the onnx model.          // Outputs          cv::Vec<float, 3> output_coord_{};  // 2d Coordinate and head size output.          cv::Vec<float, 4> output_quat_{};   //  Quaternion output @@ -83,7 +87,8 @@ class PoseEstimator          cv::Vec<float, 2> output_eyes_{};          cv::Vec<float, 3> output_coord_scales_{};          std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in. -        std::vector<const char*> output_names_; // Refers to the names in the onnx model. +        std::vector<std::string> output_names_; // Refers to the names in the onnx model. +        std::vector<const char *> output_c_names_; // Refers to the C names in the onnx model.          // More bookkeeping          size_t num_recurrent_states_ = 0;          double last_inference_time_ = 0; @@ -99,4 +104,4 @@ int find_input_intensity_quantile(const cv::Mat& frame, float percentage);  void normalize_brightness(const cv::Mat& frame, cv::Mat& out); -} // namespace neuralnet_tracker_ns
\ No newline at end of file +} // namespace neuralnet_tracker_ns | 
