diff options
author | Stanislaw Halik <sthalik@misaki.pl> | 2023-05-09 15:34:38 +0200 |
---|---|---|
committer | Stanislaw Halik <sthalik@misaki.pl> | 2023-05-09 15:34:38 +0200 |
commit | 780aca2622d907320a1cf462390f192fb3ae4b31 (patch) | |
tree | c7843a5ebe0620d8f97ae7ba86ba6f5d06ffc167 /tracker-neuralnet | |
parent | c1de1499775d47a574bd52d07acbb269845fb75b (diff) | |
parent | 051fb2f94f6364b80219a3c671bb953d2e54a140 (diff) |
Merge branch 'master' into trackhat-sensor-v2
Diffstat (limited to 'tracker-neuralnet')
-rw-r--r-- | tracker-neuralnet/CMakeLists.txt | 14 | ||||
-rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 71 | ||||
-rw-r--r-- | tracker-neuralnet/lang/ru_RU.ts | 107 | ||||
-rw-r--r-- | tracker-neuralnet/model_adapters.cpp | 41 | ||||
-rw-r--r-- | tracker-neuralnet/model_adapters.h | 13 |
5 files changed, 126 insertions, 120 deletions
diff --git a/tracker-neuralnet/CMakeLists.txt b/tracker-neuralnet/CMakeLists.txt index f414c920..db568fae 100644 --- a/tracker-neuralnet/CMakeLists.txt +++ b/tracker-neuralnet/CMakeLists.txt @@ -1,4 +1,9 @@ include(opentrack-opencv) +set(host-spec "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_SIZEOF_VOID_P}") +if(host-spec MATCHES "^Linux i[3-6]86 4$") + return() +endif() + find_package(OpenCV QUIET) find_package(OpenMP QUIET) # Used to control number of onnx threads. find_package(ONNXRuntime QUIET) @@ -11,16 +16,21 @@ if(OpenCV_FOUND AND ONNXRuntime_FOUND AND OpenMP_FOUND) otr_module(tracker-neuralnet) - target_link_libraries(${self} + target_link_libraries(${self} opentrack-cv onnxruntime::onnxruntime opencv_calib3d opencv_imgproc opencv_imgcodecs opencv_core - OpenMP::OpenMP_C + OpenMP::OpenMP_CXX ) + # OpenMP::OpenMP_CXX doesn't set up the -fopenmp linking option, so set it up ourselves. + if(NOT MSVC) + target_link_options(${self} PUBLIC ${OpenMP_CXX_FLAGS}) + endif() + install( FILES "models/head-localizer.onnx" "models/head-pose.onnx" diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index a1a3526b..59e17063 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -13,7 +13,6 @@ #include "compat/math-imports.hpp" #include "compat/timer.hpp" #include "compat/check-visible.hpp" -#include "compat/camera-names.hpp" #include "cv/init.hpp" #include <omp.h> @@ -84,8 +83,7 @@ struct OnScopeExit CamIntrinsics make_intrinsics(const cv::Mat& img, const Settings& settings) { const int w = img.cols, h = img.rows; - //const double diag_fov = settings.fov * M_PI / 180.; - const double diag_fov = 60 * M_PI / 180.; (void)settings; + const double diag_fov = settings.fov * M_PI / 180.; const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w)); const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h)); const double focal_length_w = 1. / tan(.5 * fov_w); @@ -353,7 +351,7 @@ bool NeuralNetTracker::detect() last_pose_affine_ = pose_affine; } - draw_gizmos(*face, pose_affine); + draw_gizmos(*face, last_pose_affine_); return true; } @@ -503,38 +501,17 @@ bool NeuralNetTracker::load_and_initialize_model() bool NeuralNetTracker::open_camera() { -#if 0 int rint = std::clamp(*settings_.resolution, 0, (int)std::size(resolution_choices)-1); resolution_tuple res = resolution_choices[rint]; int fps = enum_to_fps(settings_.force_fps); -#endif - - video::impl::camera::info args {}; - args.width = 640; - args.height = 480; - args.fps = 60; - args.use_mjpeg = true; QMutexLocker l(&camera_mtx_); - camera_ = nullptr; - const QString name = settings_.camera_name; - - if (name.isEmpty() || name == "TrackHat sensor") - { - camera_ = video::make_camera_("TrackHat sensor"); - if (camera_ && camera_->start(args)) - return true; - if (!name.isEmpty()) - return false; - } - - camera_ = video::make_camera(name); + camera_ = video::make_camera(settings_.camera_name); if (!camera_) return false; -#if 0 video::impl::camera::info args {}; if (res.width) @@ -546,7 +523,6 @@ bool NeuralNetTracker::open_camera() args.fps = fps; args.use_mjpeg = settings_.use_mjpeg; -#endif if (!camera_->start(args)) { @@ -624,8 +600,6 @@ void NeuralNetTracker::run() std::chrono::duration_cast<std::chrono::milliseconds>( clk.now() - t).count()*1.e-3); } - - camera_ = nullptr; } @@ -670,23 +644,19 @@ void NeuralNetTracker::update_fps(double dt) void NeuralNetTracker::data(double *data) { - auto tmp2 = [&]() + Affine tmp = [&]() { QMutexLocker lck(&mtx_); return last_pose_affine_; }(); - if (!tmp2) - return; - const auto& tmp = *tmp2; - const auto& mx = tmp.R.col(0); const auto& my = tmp.R.col(1); const auto& mz = -tmp.R.col(2); const float yaw = std::atan2(mx(2), mx(0)); const float pitch = -std::atan2(-mx(1), std::sqrt(mx(2)*mx(2)+mx(0)*mx(0))); - const float roll = -std::atan2(-my(2), mz(2)); + const float roll = std::atan2(-my(2), mz(2)); { constexpr double rad2deg = 180/M_PI; data[Yaw] = rad2deg * yaw; @@ -704,7 +674,7 @@ void NeuralNetTracker::data(double *data) Affine NeuralNetTracker::pose() { QMutexLocker lck(&mtx_); - return last_pose_affine_ ? *last_pose_affine_ : Affine{}; + return last_pose_affine_; } std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const @@ -715,19 +685,16 @@ std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const void NeuralNetDialog::make_fps_combobox() { -#if 0 for (int k = 0; k < fps_MAX; k++) { const int hz = enum_to_fps(k); const QString name = (hz == 0) ? tr("Default") : QString::number(hz); ui_.cameraFPS->addItem(name, k); } -#endif } void NeuralNetDialog::make_resolution_combobox() { -#if 0 int k=0; for (const auto [w, h] : resolution_choices) { @@ -736,7 +703,6 @@ void NeuralNetDialog::make_resolution_combobox() : QString::number(w) + " x " + QString::number(h); ui_.resolution->addItem(s, k++); } -#endif } @@ -748,44 +714,21 @@ NeuralNetDialog::NeuralNetDialog() : make_fps_combobox(); make_resolution_combobox(); - ui_.cameraName->addItem(QString{}); for (const auto& str : video::camera_names()) ui_.cameraName->addItem(str); tie_setting(settings_.camera_name, ui_.cameraName); -#if 0 tie_setting(settings_.fov, ui_.cameraFOV); -#endif tie_setting(settings_.offset_fwd, ui_.tx_spin); tie_setting(settings_.offset_up, ui_.ty_spin); tie_setting(settings_.offset_right, ui_.tz_spin); tie_setting(settings_.show_network_input, ui_.showNetworkInput); tie_setting(settings_.roi_filter_alpha, ui_.roiFilterAlpha); -#if 0 tie_setting(settings_.use_mjpeg, ui_.use_mjpeg); -#endif tie_setting(settings_.roi_zoom, ui_.roiZoom); tie_setting(settings_.num_threads, ui_.threadCount); -#if 0 tie_setting(settings_.resolution, ui_.resolution); tie_setting(settings_.force_fps, ui_.cameraFPS); -#endif - - { - const struct { - QString label; - exposure_preset preset; - } presets[] = { - { QStringLiteral("Near (1-4ft)"), exposure_preset::near }, - { QStringLiteral("Far (4-8ft)"), exposure_preset::far }, - { QStringLiteral("Custom"), exposure_preset::ignored }, - }; - - for (const auto& [label, preset] : presets) - ui_.exposure_preset->addItem(label, int(preset)); - - tie_setting(cs_.exposure, ui_.exposure_preset); - } connect(ui_.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); connect(ui_.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); @@ -807,13 +750,11 @@ NeuralNetDialog::NeuralNetDialog() : void NeuralNetDialog::save() { settings_.b->save(); - cs_.b->save(); } void NeuralNetDialog::reload() { settings_.b->reload(); - cs_.b->reload(); } void NeuralNetDialog::doOK() diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index 4c9cec04..b191e769 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -5,123 +5,150 @@ <name>Form</name> <message> <source>Tracker settings</source> - <translation type="unfinished"></translation> + <translation>Настройки трекера</translation> + </message> + <message> + <source>Diagonal FOV</source> + <translation>Угол обзора</translation> </message> <message> <source>Camera settings</source> - <translation type="unfinished"></translation> + <translation>Настройки камеры</translation> + </message> + <message> + <source>Frames per second</source> + <translation>Кадры в секунду</translation> + </message> + <message> + <source>Camera name</source> + <translation>Камера</translation> + </message> + <message> + <source>Camera Configuration</source> + <translation>Конфигурация камеры</translation> </message> <message> <source>Head Center Offset</source> - <translation type="unfinished"></translation> + <translation>Смещение центра головы</translation> </message> <message> <source> mm</source> - <translation type="unfinished"></translation> + <translation> мм</translation> </message> <message> <source>Use only yaw and pitch while calibrating. Don't roll or change position.</source> - <translation type="unfinished"></translation> + <translation>Поворачивайте голову влево-вправо и наклоняйте вверх-вниз. +Не наклоняйте набок и не смещайте голову в сторону.</translation> </message> <message> <source>Start calibration</source> - <translation type="unfinished"></translation> + <translation>Начать калибровку</translation> </message> <message> <source>Right</source> - <translation type="unfinished"></translation> + <translation>Вправо</translation> </message> <message> <source>Forward</source> - <translation type="unfinished"></translation> + <translation>Вперед</translation> </message> <message> <source>Up</source> - <translation type="unfinished"></translation> + <translation>Вверх</translation> </message> <message> - <source>Exposure preset</source> - <translation type="unfinished"></translation> + <source>Show Network Input</source> + <translation>Показать входные данные</translation> </message> <message> - <source>Camera Configuration</source> - <translation type="unfinished"></translation> + <source>MJPEG</source> + <translation>Использовать MJPEG</translation> </message> <message> <source>Tuning / Debug</source> - <translation type="unfinished"></translation> - </message> - <message> - <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> - <translation type="unfinished"></translation> + <translation>Тонкая настройка</translation> </message> <message> <source>ROI Smoothing Alpha</source> - <translation type="unfinished"></translation> + <translation>Сглаживание ROI</translation> </message> <message> <source>ROI Zoom</source> - <translation type="unfinished"></translation> + <translation>Масштабирование ROI</translation> </message> <message> - <source>Show the image patch that the pose estimation model sees.</source> - <translation type="unfinished"></translation> + <source>Thread Count</source> + <translation>Количество потоков</translation> </message> <message> - <source>Show Network Input</source> - <translation type="unfinished"></translation> + <source>Resolution</source> + <translation>Разрешение</translation> </message> <message> - <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> - <translation type="unfinished"></translation> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation>Угол обзора камеры. Требуется для преобразования положения головы в глобальные координаты</translation> </message> <message> - <source>Thread Count</source> - <translation type="unfinished"></translation> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation>Частота кадров. Реальные значения могут не поддерживаться камерой.</translation> </message> <message> - <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> - <translation type="unfinished"></translation> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation>Разрешение камеры, для тех случаев, когда быстродействие камеры максимально в определенном разрешении. Может быть масштабировано до внутреннего разрешения.</translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation>Количество потоков. Используется для балансировки нагрузки на процессор между игрой и трекером.</translation> </message> <message> - <source>Camera override</source> - <translation type="unfinished"></translation> + <source>Show the image patch that the pose estimation model sees.</source> + <translation>Показать изображение, используемое моделью определения позиции</translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation>Сглаживание координат области лица. Может помочь стабилизировать позицию.</translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation>Фактор масштабирования области лица. Применяется перед передачей кадра в модель определения позиции. Наилучшие результаты близки к 1</translation> </message> </context> <context> <name>neuralnet_tracker_ns::NeuralNetDialog</name> <message> <source>Default</source> - <translation type="unfinished"></translation> + <translation>По умолчанию</translation> </message> <message> <source>Tracker Offline</source> - <translation type="unfinished"></translation> + <translation>Трекер выключен</translation> </message> <message> <source>%1x%2 @ %3 FPS / Inference: %4 ms</source> - <translation type="unfinished"></translation> + <translation>%1x%2 @ %3 FPS; Время оценки: %4 мс</translation> </message> <message> <source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source> - <translation type="unfinished"></translation> + <translation>Сэмплов поворота: %1. +Поворачивайте голову в стороны до %2 сэмплов для стабильной калибрации.</translation> </message> <message> <source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source> - <translation type="unfinished"></translation> + <translation>Сэмплов наклона: %1. +Наклоняйте голову вниз/вверх до %2 сэмплов для стабильной калибрации.</translation> </message> <message> <source>%1 samples. Over %2, good!</source> - <translation type="unfinished"></translation> + <translation>%1 сэмплов. Более %2, достаточно.</translation> </message> <message> <source>Stop calibration</source> - <translation type="unfinished"></translation> + <translation>Остановить калибровку</translation> </message> <message> <source>Start calibration</source> - <translation type="unfinished"></translation> + <translation>Начать калибровку</translation> </message> </context> </TS> diff --git a/tracker-neuralnet/model_adapters.cpp b/tracker-neuralnet/model_adapters.cpp index af599321..a8580a89 100644 --- a/tracker-neuralnet/model_adapters.cpp +++ b/tracker-neuralnet/model_adapters.cpp @@ -8,7 +8,6 @@ #include <QDebug> - namespace neuralnet_tracker_ns { @@ -165,6 +164,24 @@ double Localizer::last_inference_time_millis() const } +std::string PoseEstimator::get_network_input_name(size_t i) const +{ +#if ORT_API_VERSION >= 12 + return std::string(&*session_.GetInputNameAllocated(i, allocator_)); +#else + return std::string(session_.GetInputName(i, allocator_)); +#endif +} + +std::string PoseEstimator::get_network_output_name(size_t i) const +{ +#if ORT_API_VERSION >= 12 + return std::string(&*session_.GetOutputNameAllocated(i, allocator_)); +#else + return std::string(session_.GetOutputName(i, allocator_)); +#endif +} + PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session) : model_version_{session.GetModelMetadata().GetVersion()} , session_{std::move(session)} @@ -215,14 +232,16 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses qDebug() << "Pose model inputs (" << session_.GetInputCount() << ")"; qDebug() << "Pose model outputs (" << session_.GetOutputCount() << "):"; + output_names_.resize(session_.GetOutputCount()); + output_c_names_.resize(session_.GetOutputCount()); for (size_t i=0; i<session_.GetOutputCount(); ++i) { - const char* name = session_.GetOutputName(i, allocator_); + std::string name = get_network_output_name(i); const auto& output_info = session_.GetOutputTypeInfo(i); const auto& onnx_tensor_spec = output_info.GetTensorTypeAndShapeInfo(); auto my_tensor_spec = understood_outputs.find(name); - qDebug() << "\t" << name << " (" << onnx_tensor_spec.GetShape() << ") dtype: " << onnx_tensor_spec.GetElementType() << " " << + qDebug() << "\t" << name.c_str() << " (" << onnx_tensor_spec.GetShape() << ") dtype: " << onnx_tensor_spec.GetElementType() << " " << (my_tensor_spec != understood_outputs.end() ? "ok" : "unknown"); if (my_tensor_spec != understood_outputs.end()) @@ -240,7 +259,8 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses // Create tensor regardless and ignore output output_val_.push_back(create_tensor(output_info, allocator_)); } - output_names_.push_back(name); + output_names_[i] = name; + output_c_names_[i] = output_names_[i].c_str(); } has_uncertainty_ = understood_outputs.at("rotaxis_scales_tril").available || @@ -270,9 +290,12 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses // output_val_.push_back(create_tensor(output_info, allocator_)); // } + input_names_.resize(session_.GetInputCount()); + input_c_names_.resize(session_.GetInputCount()); for (size_t i = 0; i < session_.GetInputCount(); ++i) { - input_names_.push_back(session_.GetInputName(i, allocator_)); + input_names_[i] = get_network_input_name(i); + input_c_names_[i] = input_names_[i].c_str(); } assert (input_names_.size() == input_val_.size()); @@ -312,11 +335,11 @@ std::optional<PoseEstimator::Face> PoseEstimator::run( { session_.Run( Ort::RunOptions{ nullptr }, - input_names_.data(), + input_c_names_.data(), input_val_.data(), input_val_.size(), - output_names_.data(), - output_val_.data(), + output_c_names_.data(), + output_val_.data(), output_val_.size()); } catch (const Ort::Exception &e) @@ -430,4 +453,4 @@ double PoseEstimator::last_inference_time_millis() const -} // namespace neuralnet_tracker_ns
\ No newline at end of file +} // namespace neuralnet_tracker_ns diff --git a/tracker-neuralnet/model_adapters.h b/tracker-neuralnet/model_adapters.h index 3fbfb861..820330cf 100644 --- a/tracker-neuralnet/model_adapters.h +++ b/tracker-neuralnet/model_adapters.h @@ -3,6 +3,7 @@ #include <optional> #include <array> #include <vector> +#include <string> #include <onnxruntime_cxx_api.h> #include <opencv2/core.hpp> @@ -21,7 +22,7 @@ class Localizer public: Localizer(Ort::MemoryInfo &allocator_info, Ort::Session &&session); - + // Returns bounding wrt image coordinate of the input image // The preceeding float is the score for being a face normalized to [0,1]. std::pair<float, cv::Rect2f> run( @@ -68,13 +69,16 @@ class PoseEstimator bool has_uncertainty() const { return has_uncertainty_; } private: + std::string get_network_input_name(size_t i) const; + std::string get_network_output_name(size_t i) const; int64_t model_version_ = 0; // Queried meta data from the ONNX file Ort::Session session_{nullptr}; // ONNX's runtime context for running the model Ort::Allocator allocator_; // Memory allocator for tensors // Inputs cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?) std::vector<Ort::Value> input_val_; // Tensors to put into the model - std::vector<const char*> input_names_; // Refers to the names in the onnx model. + std::vector<std::string> input_names_; // Refers to the names in the onnx model. + std::vector<const char *> input_c_names_; // Refers to the C names in the onnx model. // Outputs cv::Vec<float, 3> output_coord_{}; // 2d Coordinate and head size output. cv::Vec<float, 4> output_quat_{}; // Quaternion output @@ -83,7 +87,8 @@ class PoseEstimator cv::Vec<float, 2> output_eyes_{}; cv::Vec<float, 3> output_coord_scales_{}; std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in. - std::vector<const char*> output_names_; // Refers to the names in the onnx model. + std::vector<std::string> output_names_; // Refers to the names in the onnx model. + std::vector<const char *> output_c_names_; // Refers to the C names in the onnx model. // More bookkeeping size_t num_recurrent_states_ = 0; double last_inference_time_ = 0; @@ -99,4 +104,4 @@ int find_input_intensity_quantile(const cv::Mat& frame, float percentage); void normalize_brightness(const cv::Mat& frame, cv::Mat& out); -} // namespace neuralnet_tracker_ns
\ No newline at end of file +} // namespace neuralnet_tracker_ns |