summaryrefslogtreecommitdiffhomepage
path: root/tracker-neuralnet
diff options
context:
space:
mode:
authorStanislaw Halik <sthalik@misaki.pl>2023-05-09 15:34:38 +0200
committerStanislaw Halik <sthalik@misaki.pl>2023-05-09 15:34:38 +0200
commit780aca2622d907320a1cf462390f192fb3ae4b31 (patch)
treec7843a5ebe0620d8f97ae7ba86ba6f5d06ffc167 /tracker-neuralnet
parentc1de1499775d47a574bd52d07acbb269845fb75b (diff)
parent051fb2f94f6364b80219a3c671bb953d2e54a140 (diff)
Merge branch 'master' into trackhat-sensor-v2
Diffstat (limited to 'tracker-neuralnet')
-rw-r--r--tracker-neuralnet/CMakeLists.txt14
-rw-r--r--tracker-neuralnet/ftnoir_tracker_neuralnet.cpp71
-rw-r--r--tracker-neuralnet/lang/ru_RU.ts107
-rw-r--r--tracker-neuralnet/model_adapters.cpp41
-rw-r--r--tracker-neuralnet/model_adapters.h13
5 files changed, 126 insertions, 120 deletions
diff --git a/tracker-neuralnet/CMakeLists.txt b/tracker-neuralnet/CMakeLists.txt
index f414c920..db568fae 100644
--- a/tracker-neuralnet/CMakeLists.txt
+++ b/tracker-neuralnet/CMakeLists.txt
@@ -1,4 +1,9 @@
include(opentrack-opencv)
+set(host-spec "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_SIZEOF_VOID_P}")
+if(host-spec MATCHES "^Linux i[3-6]86 4$")
+ return()
+endif()
+
find_package(OpenCV QUIET)
find_package(OpenMP QUIET) # Used to control number of onnx threads.
find_package(ONNXRuntime QUIET)
@@ -11,16 +16,21 @@ if(OpenCV_FOUND AND ONNXRuntime_FOUND AND OpenMP_FOUND)
otr_module(tracker-neuralnet)
- target_link_libraries(${self}
+ target_link_libraries(${self}
opentrack-cv
onnxruntime::onnxruntime
opencv_calib3d
opencv_imgproc
opencv_imgcodecs
opencv_core
- OpenMP::OpenMP_C
+ OpenMP::OpenMP_CXX
)
+ # OpenMP::OpenMP_CXX doesn't set up the -fopenmp linking option, so set it up ourselves.
+ if(NOT MSVC)
+ target_link_options(${self} PUBLIC ${OpenMP_CXX_FLAGS})
+ endif()
+
install(
FILES "models/head-localizer.onnx"
"models/head-pose.onnx"
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
index a1a3526b..59e17063 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
@@ -13,7 +13,6 @@
#include "compat/math-imports.hpp"
#include "compat/timer.hpp"
#include "compat/check-visible.hpp"
-#include "compat/camera-names.hpp"
#include "cv/init.hpp"
#include <omp.h>
@@ -84,8 +83,7 @@ struct OnScopeExit
CamIntrinsics make_intrinsics(const cv::Mat& img, const Settings& settings)
{
const int w = img.cols, h = img.rows;
- //const double diag_fov = settings.fov * M_PI / 180.;
- const double diag_fov = 60 * M_PI / 180.; (void)settings;
+ const double diag_fov = settings.fov * M_PI / 180.;
const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));
const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));
const double focal_length_w = 1. / tan(.5 * fov_w);
@@ -353,7 +351,7 @@ bool NeuralNetTracker::detect()
last_pose_affine_ = pose_affine;
}
- draw_gizmos(*face, pose_affine);
+ draw_gizmos(*face, last_pose_affine_);
return true;
}
@@ -503,38 +501,17 @@ bool NeuralNetTracker::load_and_initialize_model()
bool NeuralNetTracker::open_camera()
{
-#if 0
int rint = std::clamp(*settings_.resolution, 0, (int)std::size(resolution_choices)-1);
resolution_tuple res = resolution_choices[rint];
int fps = enum_to_fps(settings_.force_fps);
-#endif
-
- video::impl::camera::info args {};
- args.width = 640;
- args.height = 480;
- args.fps = 60;
- args.use_mjpeg = true;
QMutexLocker l(&camera_mtx_);
- camera_ = nullptr;
- const QString name = settings_.camera_name;
-
- if (name.isEmpty() || name == "TrackHat sensor")
- {
- camera_ = video::make_camera_("TrackHat sensor");
- if (camera_ && camera_->start(args))
- return true;
- if (!name.isEmpty())
- return false;
- }
-
- camera_ = video::make_camera(name);
+ camera_ = video::make_camera(settings_.camera_name);
if (!camera_)
return false;
-#if 0
video::impl::camera::info args {};
if (res.width)
@@ -546,7 +523,6 @@ bool NeuralNetTracker::open_camera()
args.fps = fps;
args.use_mjpeg = settings_.use_mjpeg;
-#endif
if (!camera_->start(args))
{
@@ -624,8 +600,6 @@ void NeuralNetTracker::run()
std::chrono::duration_cast<std::chrono::milliseconds>(
clk.now() - t).count()*1.e-3);
}
-
- camera_ = nullptr;
}
@@ -670,23 +644,19 @@ void NeuralNetTracker::update_fps(double dt)
void NeuralNetTracker::data(double *data)
{
- auto tmp2 = [&]()
+ Affine tmp = [&]()
{
QMutexLocker lck(&mtx_);
return last_pose_affine_;
}();
- if (!tmp2)
- return;
- const auto& tmp = *tmp2;
-
const auto& mx = tmp.R.col(0);
const auto& my = tmp.R.col(1);
const auto& mz = -tmp.R.col(2);
const float yaw = std::atan2(mx(2), mx(0));
const float pitch = -std::atan2(-mx(1), std::sqrt(mx(2)*mx(2)+mx(0)*mx(0)));
- const float roll = -std::atan2(-my(2), mz(2));
+ const float roll = std::atan2(-my(2), mz(2));
{
constexpr double rad2deg = 180/M_PI;
data[Yaw] = rad2deg * yaw;
@@ -704,7 +674,7 @@ void NeuralNetTracker::data(double *data)
Affine NeuralNetTracker::pose()
{
QMutexLocker lck(&mtx_);
- return last_pose_affine_ ? *last_pose_affine_ : Affine{};
+ return last_pose_affine_;
}
std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const
@@ -715,19 +685,16 @@ std::tuple<cv::Size,double, double> NeuralNetTracker::stats() const
void NeuralNetDialog::make_fps_combobox()
{
-#if 0
for (int k = 0; k < fps_MAX; k++)
{
const int hz = enum_to_fps(k);
const QString name = (hz == 0) ? tr("Default") : QString::number(hz);
ui_.cameraFPS->addItem(name, k);
}
-#endif
}
void NeuralNetDialog::make_resolution_combobox()
{
-#if 0
int k=0;
for (const auto [w, h] : resolution_choices)
{
@@ -736,7 +703,6 @@ void NeuralNetDialog::make_resolution_combobox()
: QString::number(w) + " x " + QString::number(h);
ui_.resolution->addItem(s, k++);
}
-#endif
}
@@ -748,44 +714,21 @@ NeuralNetDialog::NeuralNetDialog() :
make_fps_combobox();
make_resolution_combobox();
- ui_.cameraName->addItem(QString{});
for (const auto& str : video::camera_names())
ui_.cameraName->addItem(str);
tie_setting(settings_.camera_name, ui_.cameraName);
-#if 0
tie_setting(settings_.fov, ui_.cameraFOV);
-#endif
tie_setting(settings_.offset_fwd, ui_.tx_spin);
tie_setting(settings_.offset_up, ui_.ty_spin);
tie_setting(settings_.offset_right, ui_.tz_spin);
tie_setting(settings_.show_network_input, ui_.showNetworkInput);
tie_setting(settings_.roi_filter_alpha, ui_.roiFilterAlpha);
-#if 0
tie_setting(settings_.use_mjpeg, ui_.use_mjpeg);
-#endif
tie_setting(settings_.roi_zoom, ui_.roiZoom);
tie_setting(settings_.num_threads, ui_.threadCount);
-#if 0
tie_setting(settings_.resolution, ui_.resolution);
tie_setting(settings_.force_fps, ui_.cameraFPS);
-#endif
-
- {
- const struct {
- QString label;
- exposure_preset preset;
- } presets[] = {
- { QStringLiteral("Near (1-4ft)"), exposure_preset::near },
- { QStringLiteral("Far (4-8ft)"), exposure_preset::far },
- { QStringLiteral("Custom"), exposure_preset::ignored },
- };
-
- for (const auto& [label, preset] : presets)
- ui_.exposure_preset->addItem(label, int(preset));
-
- tie_setting(cs_.exposure, ui_.exposure_preset);
- }
connect(ui_.buttonBox, SIGNAL(accepted()), this, SLOT(doOK()));
connect(ui_.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel()));
@@ -807,13 +750,11 @@ NeuralNetDialog::NeuralNetDialog() :
void NeuralNetDialog::save()
{
settings_.b->save();
- cs_.b->save();
}
void NeuralNetDialog::reload()
{
settings_.b->reload();
- cs_.b->reload();
}
void NeuralNetDialog::doOK()
diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts
index 4c9cec04..b191e769 100644
--- a/tracker-neuralnet/lang/ru_RU.ts
+++ b/tracker-neuralnet/lang/ru_RU.ts
@@ -5,123 +5,150 @@
<name>Form</name>
<message>
<source>Tracker settings</source>
- <translation type="unfinished"></translation>
+ <translation>Настройки трекера</translation>
+ </message>
+ <message>
+ <source>Diagonal FOV</source>
+ <translation>Угол обзора</translation>
</message>
<message>
<source>Camera settings</source>
- <translation type="unfinished"></translation>
+ <translation>Настройки камеры</translation>
+ </message>
+ <message>
+ <source>Frames per second</source>
+ <translation>Кадры в секунду</translation>
+ </message>
+ <message>
+ <source>Camera name</source>
+ <translation>Камера</translation>
+ </message>
+ <message>
+ <source>Camera Configuration</source>
+ <translation>Конфигурация камеры</translation>
</message>
<message>
<source>Head Center Offset</source>
- <translation type="unfinished"></translation>
+ <translation>Смещение центра головы</translation>
</message>
<message>
<source> mm</source>
- <translation type="unfinished"></translation>
+ <translation> мм</translation>
</message>
<message>
<source>Use only yaw and pitch while calibrating.
Don&apos;t roll or change position.</source>
- <translation type="unfinished"></translation>
+ <translation>Поворачивайте голову влево-вправо и наклоняйте вверх-вниз.
+Не наклоняйте набок и не смещайте голову в сторону.</translation>
</message>
<message>
<source>Start calibration</source>
- <translation type="unfinished"></translation>
+ <translation>Начать калибровку</translation>
</message>
<message>
<source>Right</source>
- <translation type="unfinished"></translation>
+ <translation>Вправо</translation>
</message>
<message>
<source>Forward</source>
- <translation type="unfinished"></translation>
+ <translation>Вперед</translation>
</message>
<message>
<source>Up</source>
- <translation type="unfinished"></translation>
+ <translation>Вверх</translation>
</message>
<message>
- <source>Exposure preset</source>
- <translation type="unfinished"></translation>
+ <source>Show Network Input</source>
+ <translation>Показать входные данные</translation>
</message>
<message>
- <source>Camera Configuration</source>
- <translation type="unfinished"></translation>
+ <source>MJPEG</source>
+ <translation>Использовать MJPEG</translation>
</message>
<message>
<source>Tuning / Debug</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source>
- <translation type="unfinished"></translation>
+ <translation>Тонкая настройка</translation>
</message>
<message>
<source>ROI Smoothing Alpha</source>
- <translation type="unfinished"></translation>
+ <translation>Сглаживание ROI</translation>
</message>
<message>
<source>ROI Zoom</source>
- <translation type="unfinished"></translation>
+ <translation>Масштабирование ROI</translation>
</message>
<message>
- <source>Show the image patch that the pose estimation model sees.</source>
- <translation type="unfinished"></translation>
+ <source>Thread Count</source>
+ <translation>Количество потоков</translation>
</message>
<message>
- <source>Show Network Input</source>
- <translation type="unfinished"></translation>
+ <source>Resolution</source>
+ <translation>Разрешение</translation>
</message>
<message>
- <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source>
- <translation type="unfinished"></translation>
+ <source>Field of view. Needed to transform the pose to world coordinates.</source>
+ <translation>Угол обзора камеры. Требуется для преобразования положения головы в глобальные координаты</translation>
</message>
<message>
- <source>Thread Count</source>
- <translation type="unfinished"></translation>
+ <source>Requested video frame rate. Actual setting may not be supported by the camera.</source>
+ <translation>Частота кадров. Реальные значения могут не поддерживаться камерой.</translation>
</message>
<message>
- <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source>
- <translation type="unfinished"></translation>
+ <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source>
+ <translation>Разрешение камеры, для тех случаев, когда быстродействие камеры максимально в определенном разрешении. Может быть масштабировано до внутреннего разрешения.</translation>
+ </message>
+ <message>
+ <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source>
+ <translation>Количество потоков. Используется для балансировки нагрузки на процессор между игрой и трекером.</translation>
</message>
<message>
- <source>Camera override</source>
- <translation type="unfinished"></translation>
+ <source>Show the image patch that the pose estimation model sees.</source>
+ <translation>Показать изображение, используемое моделью определения позиции</translation>
+ </message>
+ <message>
+ <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source>
+ <translation>Сглаживание координат области лица. Может помочь стабилизировать позицию.</translation>
+ </message>
+ <message>
+ <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source>
+ <translation>Фактор масштабирования области лица. Применяется перед передачей кадра в модель определения позиции. Наилучшие результаты близки к 1</translation>
</message>
</context>
<context>
<name>neuralnet_tracker_ns::NeuralNetDialog</name>
<message>
<source>Default</source>
- <translation type="unfinished"></translation>
+ <translation>По умолчанию</translation>
</message>
<message>
<source>Tracker Offline</source>
- <translation type="unfinished"></translation>
+ <translation>Трекер выключен</translation>
</message>
<message>
<source>%1x%2 @ %3 FPS / Inference: %4 ms</source>
- <translation type="unfinished"></translation>
+ <translation>%1x%2 @ %3 FPS; Время оценки: %4 мс</translation>
</message>
<message>
<source>%1 yaw samples. Yaw more to %2 samples for stable calibration.</source>
- <translation type="unfinished"></translation>
+ <translation>Сэмплов поворота: %1.
+Поворачивайте голову в стороны до %2 сэмплов для стабильной калибрации.</translation>
</message>
<message>
<source>%1 pitch samples. Pitch more to %2 samples for stable calibration.</source>
- <translation type="unfinished"></translation>
+ <translation>Сэмплов наклона: %1.
+Наклоняйте голову вниз/вверх до %2 сэмплов для стабильной калибрации.</translation>
</message>
<message>
<source>%1 samples. Over %2, good!</source>
- <translation type="unfinished"></translation>
+ <translation>%1 сэмплов. Более %2, достаточно.</translation>
</message>
<message>
<source>Stop calibration</source>
- <translation type="unfinished"></translation>
+ <translation>Остановить калибровку</translation>
</message>
<message>
<source>Start calibration</source>
- <translation type="unfinished"></translation>
+ <translation>Начать калибровку</translation>
</message>
</context>
</TS>
diff --git a/tracker-neuralnet/model_adapters.cpp b/tracker-neuralnet/model_adapters.cpp
index af599321..a8580a89 100644
--- a/tracker-neuralnet/model_adapters.cpp
+++ b/tracker-neuralnet/model_adapters.cpp
@@ -8,7 +8,6 @@
#include <QDebug>
-
namespace neuralnet_tracker_ns
{
@@ -165,6 +164,24 @@ double Localizer::last_inference_time_millis() const
}
+std::string PoseEstimator::get_network_input_name(size_t i) const
+{
+#if ORT_API_VERSION >= 12
+ return std::string(&*session_.GetInputNameAllocated(i, allocator_));
+#else
+ return std::string(session_.GetInputName(i, allocator_));
+#endif
+}
+
+std::string PoseEstimator::get_network_output_name(size_t i) const
+{
+#if ORT_API_VERSION >= 12
+ return std::string(&*session_.GetOutputNameAllocated(i, allocator_));
+#else
+ return std::string(session_.GetOutputName(i, allocator_));
+#endif
+}
+
PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session)
: model_version_{session.GetModelMetadata().GetVersion()}
, session_{std::move(session)}
@@ -215,14 +232,16 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses
qDebug() << "Pose model inputs (" << session_.GetInputCount() << ")";
qDebug() << "Pose model outputs (" << session_.GetOutputCount() << "):";
+ output_names_.resize(session_.GetOutputCount());
+ output_c_names_.resize(session_.GetOutputCount());
for (size_t i=0; i<session_.GetOutputCount(); ++i)
{
- const char* name = session_.GetOutputName(i, allocator_);
+ std::string name = get_network_output_name(i);
const auto& output_info = session_.GetOutputTypeInfo(i);
const auto& onnx_tensor_spec = output_info.GetTensorTypeAndShapeInfo();
auto my_tensor_spec = understood_outputs.find(name);
- qDebug() << "\t" << name << " (" << onnx_tensor_spec.GetShape() << ") dtype: " << onnx_tensor_spec.GetElementType() << " " <<
+ qDebug() << "\t" << name.c_str() << " (" << onnx_tensor_spec.GetShape() << ") dtype: " << onnx_tensor_spec.GetElementType() << " " <<
(my_tensor_spec != understood_outputs.end() ? "ok" : "unknown");
if (my_tensor_spec != understood_outputs.end())
@@ -240,7 +259,8 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses
// Create tensor regardless and ignore output
output_val_.push_back(create_tensor(output_info, allocator_));
}
- output_names_.push_back(name);
+ output_names_[i] = name;
+ output_c_names_[i] = output_names_[i].c_str();
}
has_uncertainty_ = understood_outputs.at("rotaxis_scales_tril").available ||
@@ -270,9 +290,12 @@ PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&ses
// output_val_.push_back(create_tensor(output_info, allocator_));
// }
+ input_names_.resize(session_.GetInputCount());
+ input_c_names_.resize(session_.GetInputCount());
for (size_t i = 0; i < session_.GetInputCount(); ++i)
{
- input_names_.push_back(session_.GetInputName(i, allocator_));
+ input_names_[i] = get_network_input_name(i);
+ input_c_names_[i] = input_names_[i].c_str();
}
assert (input_names_.size() == input_val_.size());
@@ -312,11 +335,11 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(
{
session_.Run(
Ort::RunOptions{ nullptr },
- input_names_.data(),
+ input_c_names_.data(),
input_val_.data(),
input_val_.size(),
- output_names_.data(),
- output_val_.data(),
+ output_c_names_.data(),
+ output_val_.data(),
output_val_.size());
}
catch (const Ort::Exception &e)
@@ -430,4 +453,4 @@ double PoseEstimator::last_inference_time_millis() const
-} // namespace neuralnet_tracker_ns \ No newline at end of file
+} // namespace neuralnet_tracker_ns
diff --git a/tracker-neuralnet/model_adapters.h b/tracker-neuralnet/model_adapters.h
index 3fbfb861..820330cf 100644
--- a/tracker-neuralnet/model_adapters.h
+++ b/tracker-neuralnet/model_adapters.h
@@ -3,6 +3,7 @@
#include <optional>
#include <array>
#include <vector>
+#include <string>
#include <onnxruntime_cxx_api.h>
#include <opencv2/core.hpp>
@@ -21,7 +22,7 @@ class Localizer
public:
Localizer(Ort::MemoryInfo &allocator_info,
Ort::Session &&session);
-
+
// Returns bounding wrt image coordinate of the input image
// The preceeding float is the score for being a face normalized to [0,1].
std::pair<float, cv::Rect2f> run(
@@ -68,13 +69,16 @@ class PoseEstimator
bool has_uncertainty() const { return has_uncertainty_; }
private:
+ std::string get_network_input_name(size_t i) const;
+ std::string get_network_output_name(size_t i) const;
int64_t model_version_ = 0; // Queried meta data from the ONNX file
Ort::Session session_{nullptr}; // ONNX's runtime context for running the model
Ort::Allocator allocator_; // Memory allocator for tensors
// Inputs
cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?)
std::vector<Ort::Value> input_val_; // Tensors to put into the model
- std::vector<const char*> input_names_; // Refers to the names in the onnx model.
+ std::vector<std::string> input_names_; // Refers to the names in the onnx model.
+ std::vector<const char *> input_c_names_; // Refers to the C names in the onnx model.
// Outputs
cv::Vec<float, 3> output_coord_{}; // 2d Coordinate and head size output.
cv::Vec<float, 4> output_quat_{}; // Quaternion output
@@ -83,7 +87,8 @@ class PoseEstimator
cv::Vec<float, 2> output_eyes_{};
cv::Vec<float, 3> output_coord_scales_{};
std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in.
- std::vector<const char*> output_names_; // Refers to the names in the onnx model.
+ std::vector<std::string> output_names_; // Refers to the names in the onnx model.
+ std::vector<const char *> output_c_names_; // Refers to the C names in the onnx model.
// More bookkeeping
size_t num_recurrent_states_ = 0;
double last_inference_time_ = 0;
@@ -99,4 +104,4 @@ int find_input_intensity_quantile(const cv::Mat& frame, float percentage);
void normalize_brightness(const cv::Mat& frame, cv::Mat& out);
-} // namespace neuralnet_tracker_ns \ No newline at end of file
+} // namespace neuralnet_tracker_ns