summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMichael Welter <michael@welter-4d.de>2022-05-12 19:54:57 +0200
committerMichael Welter <michael@welter-4d.de>2022-05-19 20:46:50 +0200
commit4d6e80a6c585f6929f6bf4bf319932732160ce84 (patch)
treeffbef0e4a2625afccd9b3b1c3da682f54d1c32a1
parent25af3db8ba560d86f78aa4bfd10039588b7bab82 (diff)
tracker/nn: Add support for recurrent model and further tweaks
Regarding tweaks: * EWA smoothing of head ROI. Smoothing strength is a UI setting. * Adjustible zoom into the detected face. The predicted ROI is scaled by a factor the user can set. There is a sweet spot somewhere near 1. * Adjustible number of threads * The ROI is no longer taken as model output directly. This was actually not needed. Perhaps as auxiliary training objective for the network. But the tracker implementation now just uses a square area around the head center according to the predicted head size. * Add comment and debug notification on face ROI model output
-rw-r--r--tracker-neuralnet/ftnoir_tracker_neuralnet.cpp272
-rw-r--r--tracker-neuralnet/ftnoir_tracker_neuralnet.h27
-rw-r--r--tracker-neuralnet/lang/nl_NL.ts16
-rw-r--r--tracker-neuralnet/lang/ru_RU.ts16
-rw-r--r--tracker-neuralnet/lang/stub.ts16
-rw-r--r--tracker-neuralnet/lang/zh_CN.ts16
-rw-r--r--tracker-neuralnet/neuralnet-trackercontrols.ui146
7 files changed, 432 insertions, 77 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
index 06874e6c..00f3f281 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
@@ -16,6 +16,7 @@
#include <opencv2/imgcodecs.hpp>
#include "compat/timer.hpp"
#include <omp.h>
+#include <stdexcept>
#ifdef _MSC_VER
# pragma warning(disable : 4702)
@@ -29,6 +30,7 @@
#include <cmath>
#include <algorithm>
#include <chrono>
+#include <string>
// Some demo code for onnx
// https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp
@@ -43,9 +45,6 @@ using numeric_types::vec2;
using numeric_types::mat33;
using quat = std::array<numeric_types::f,4>;
-// Minimal difference if at all going from 1 to 2 threads.
-static constexpr int num_threads = 1;
-
#if _MSC_VER
std::wstring convert(const QString &s) { return s.toStdWString(); }
@@ -69,6 +68,41 @@ cv::Rect_<T> squarize(const cv::Rect_<T> &r)
}
+template<class T>
+cv::Point_<T> as_point(const cv::Size_<T>& s)
+{
+ return { s.width, s.height };
+}
+
+
+template<class T>
+cv::Size_<T> as_size(const cv::Point_<T>& p)
+{
+ return { p.x, p.y };
+}
+
+
+template<class T>
+cv::Rect_<T> expand(const cv::Rect_<T>& r, T factor)
+{
+ // xnew = l+.5*w - w*f*0.5 = l + .5*(w - new_w)
+ const cv::Size_<T> new_size = { r.width * factor, r.height * factor };
+ const cv::Point_<T> new_tl = r.tl() + (as_point(r.size()) - as_point(new_size)) / T(2);
+ return cv::Rect_<T>(new_tl, new_size);
+}
+
+
+template<class T>
+cv::Rect_<T> ewa_filter(const cv::Rect_<T>& last, const cv::Rect_<T>& current, T alpha)
+{
+ const auto last_center = T(0.5) * (last.tl() + last.br());
+ const auto cur_center = T(0.5) * (current.tl() + current.br());
+ const cv::Point_<T> new_size = as_point(last.size()) + alpha * (as_point(current.size()) - as_point(last.size()));
+ const cv::Point_<T> new_center = last_center + alpha * (cur_center - last_center);
+ return cv::Rect_<T>(new_center - T(0.5) * new_size, as_size(new_size));
+}
+
+
cv::Rect2f unnormalize(const cv::Rect2f &r, int h, int w)
{
auto unnorm = [](float x) -> float { return 0.5*(x+1); };
@@ -138,6 +172,30 @@ mat33 quaternion_to_mat33(const std::array<float,4> quat)
}
+vec3 rotate_vec(const quat& q, const vec3& p)
+{
+ const float qw = q[0];
+ const float qi = q[1];
+ const float qj = q[2];
+ const float qk = q[3];
+ const float pi = p[0];
+ const float pj = p[1];
+ const float pk = p[2];
+ const quat tmp{
+ - qi*pi - qj*pj - qk*pk,
+ qw*pi + qj*pk - qk*pj,
+ qw*pj - qi*pk + qk*pi,
+ qw*pk + qi*pj - qj*pi
+ };
+ const vec3 out {
+ -tmp[0]*qi + tmp[1]*qw - tmp[2]*qk + tmp[3]*qj,
+ -tmp[0]*qj + tmp[1]*qk + tmp[2]*qw - tmp[3]*qi,
+ -tmp[0]*qk - tmp[1]*qj + tmp[2]*qi + tmp[3]*qw
+ };
+ return out;
+}
+
+
vec3 image_to_world(float x, float y, float size, float reference_size_in_mm, const cv::Size2i& image_size, const CamIntrinsics& intrinsics)
{
// Compute the location the network outputs in 3d space.
@@ -186,8 +244,8 @@ T iou(const cv::Rect_<T> &a, const cv::Rect_<T> &b)
// have the shape B x C x H x W, where B=C=1.
cv::Size get_input_image_shape(const Ort::Session &session)
{
- if (session.GetInputCount() != 1)
- throw std::invalid_argument("Model must take exactly one input tensor");
+ if (session.GetInputCount() < 1)
+ throw std::invalid_argument("Model must take at least one input tensor");
const std::vector<std::int64_t> shape =
session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
if (shape.size() != 4)
@@ -196,6 +254,17 @@ cv::Size get_input_image_shape(const Ort::Session &session)
}
+Ort::Value create_tensor(const Ort::TypeInfo& info, Ort::Allocator& alloc)
+{
+ const auto shape = info.GetTensorTypeAndShapeInfo().GetShape();
+ auto t = Ort::Value::CreateTensor<float>(
+ alloc, shape.data(), shape.size());
+ memset(t.GetTensorMutableData<float>(), 0, sizeof(float)*info.GetTensorTypeAndShapeInfo().GetElementCount());
+ return t;
+}
+
+
+
int enum_to_fps(int value)
{
switch (value)
@@ -241,10 +310,7 @@ std::pair<float, cv::Rect2f> Localizer::run(
Timer t; t.start();
- const auto nt = omp_get_num_threads();
- omp_set_num_threads(num_threads);
session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1);
- omp_set_num_threads(nt);
last_inference_time = t.elapsed_ms();
@@ -267,36 +333,85 @@ double Localizer::last_inference_time_millis() const
PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&_session)
- : session{std::move(_session)}
- , model_version(session.GetModelMetadata().GetVersion())
+ : model_version{_session.GetModelMetadata().GetVersion()}
+ , session{std::move(_session)}
+ , allocator{session, allocator_info}
{
+ using namespace std::literals::string_literals;
+
+ if (session.GetOutputCount() < 2)
+ throw std::runtime_error("Invalid Model: must have at least two outputs");
+
+ // WARNING
+ // If the model was saved without meta data, it seems the version field is uninitialized.
+ // In that case reading from it is UB. However, we will just get same arbitrary number
+ // which is hopefully different from the numbers used by models where the version is set.
+ if (model_version != 2)
+ model_version = 1;
+
const cv::Size input_image_shape = get_input_image_shape(session);
scaled_frame = cv::Mat(input_image_shape, CV_8U);
- input_mat = cv::Mat(input_image_shape, CV_32F);
+ input_mat = cv::Mat(input_image_shape, CV_32F);
{
const std::int64_t input_shape[4] = { 1, 1, input_image_shape.height, input_image_shape.width };
- input_val = Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4);
+ input_val.push_back(
+ Ort::Value::CreateTensor<float>(allocator_info, input_mat.ptr<float>(0), input_mat.total(), input_shape, 4));
}
{
const std::int64_t output_shape[2] = { 1, 3 };
- output_val[0] = Ort::Value::CreateTensor<float>(
- allocator_info, &output_coord[0], output_coord.rows, output_shape, 2);
+ output_val.push_back(Ort::Value::CreateTensor<float>(
+ allocator_info, &output_coord[0], output_coord.rows, output_shape, 2));
}
{
const std::int64_t output_shape[2] = { 1, 4 };
- output_val[1] = Ort::Value::CreateTensor<float>(
- allocator_info, &output_quat[0], output_quat.rows, output_shape, 2);
+ output_val.push_back(Ort::Value::CreateTensor<float>(
+ allocator_info, &output_quat[0], output_quat.rows, output_shape, 2));
}
+ size_t num_regular_outputs = 2;
+
+ if (session.GetOutputCount() >= 3 && "box"s == session.GetOutputName(2, allocator))
{
const std::int64_t output_shape[2] = { 1, 4 };
- output_val[2] = Ort::Value::CreateTensor<float>(
- allocator_info, &output_box[0], output_box.rows, output_shape, 2);
+ output_val.push_back(Ort::Value::CreateTensor<float>(
+ allocator_info, &output_box[0], output_box.rows, output_shape, 2));
+ ++num_regular_outputs;
+ qDebug() << "Note: Legacy model output for face ROI is currently ignored";
+ }
+
+ num_recurrent_states = session.GetInputCount()-1;
+ if (session.GetOutputCount()-num_regular_outputs != num_recurrent_states)
+ throw std::runtime_error("Invalid Model: After regular inputs and outputs the model must have equal number of inputs and outputs for tensors holding hidden states of recurrent layers.");
+
+ // Create tensors for recurrent state
+ for (size_t i = 0; i < num_recurrent_states; ++i)
+ {
+ const auto& input_info = session.GetInputTypeInfo(1+i);
+ const auto& output_info = session.GetOutputTypeInfo(num_regular_outputs+i);
+ if (input_info.GetTensorTypeAndShapeInfo().GetShape() !=
+ output_info.GetTensorTypeAndShapeInfo().GetShape())
+ throw std::runtime_error("Invalid Model: Tensors for recurrent hidden states should have same shape on intput and output");
+ input_val.push_back(create_tensor(input_info, allocator));
+ output_val.push_back(create_tensor(output_info, allocator));
}
+
+ for (size_t i = 0; i < session.GetInputCount(); ++i)
+ {
+ input_names.push_back(session.GetInputName(i, allocator));
+ }
+ for (size_t i = 0; i < session.GetOutputCount(); ++i)
+ {
+ output_names.push_back(session.GetOutputName(i, allocator));
+ }
+
+ qDebug() << "Model inputs: " << session.GetInputCount() << ", outputs: " << session.GetOutputCount() << ", recurrent states: " << num_recurrent_states;
+
+ assert (input_names.size() == input_val.size());
+ assert (output_names.size() == output_val.size());
}
@@ -336,7 +451,7 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(
};
cv::getRectSubPix(frame, {patch_size, patch_size}, patch_center, cropped);
- // Will get failure if patch_center is outside image boundaries.
+ // Will get failure if patch_center is outside image boundariesettings.
// Have to catch this case.
if (cropped.rows != patch_size || cropped.cols != patch_size)
return {};
@@ -355,26 +470,37 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(
assert (input_mat.ptr(0) == p);
assert (!input_mat.empty() && input_mat.isContinuous());
- const char* input_names[] = {"x"};
- const char* output_names[] = {"pos_size", "quat", "box"};
Timer t; t.start();
- const auto nt = omp_get_num_threads();
- omp_set_num_threads(num_threads);
try
{
- session.Run(Ort::RunOptions{ nullptr }, input_names, &input_val, 1, output_names, output_val, 3);
+ session.Run(
+ Ort::RunOptions{ nullptr },
+ input_names.data(),
+ input_val.data(),
+ input_val.size(),
+ output_names.data(),
+ output_val.data(),
+ output_val.size());
}
catch (const Ort::Exception &e)
{
qDebug() << "Failed to run the model: " << e.what();
- omp_set_num_threads(nt);
return {};
}
- omp_set_num_threads(nt);
- // FIXME: Execution time fluctuates wildly. 19 to 26 ms. Why???
+ for (size_t i = 0; i<num_recurrent_states; ++i)
+ {
+ // Next step, the current output becomes the input.
+ // Thus we realize the recurrent connection.
+ // Only swaps the internal pointers. There is no copy of data.
+ std::swap(
+ output_val[output_val.size()-num_recurrent_states+i],
+ input_val[input_val.size()-num_recurrent_states+i]);
+ }
+
+ // FIXME: Execution time fluctuates wildly. 19 to 26 msec. Why?
// The instructions are always the same. Maybe a memory allocation
// issue. The ONNX api suggests that tensor are allocated in an
// arena. Does that matter? Maybe the issue is something else?
@@ -383,7 +509,7 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(
// Perform coordinate transformation.
// From patch-local normalized in [-1,1] to
- // frame unnormalized pixel coordinates.
+ // frame unnormalized pixel coordinatesettings.
const cv::Point2f center = patch_center +
(0.5f*patch_size)*cv::Point2f{output_coord[0], output_coord[1]};
@@ -462,7 +588,7 @@ bool neuralnet_tracker::detect()
auto face = poseestimator->run(grayscale, *last_roi);
last_inference_time += poseestimator->last_inference_time_millis();
-
+
if (!face)
{
last_roi.reset();
@@ -470,7 +596,23 @@ bool neuralnet_tracker::detect()
return false;
}
- last_roi = face->box;
+ {
+ // Here: compute ROI from head size estimate. This helps make the size prediction more
+ // invariant to mouth opening. The tracking can be lost more often at extreme
+ // rotations, depending on the implementation details. The code down here has
+ // been tweaked so that it works pretty well.
+ // In old behaviour ROI is taken from the model outputs
+ const vec3 offset = rotate_vec(face->rotation, vec3{0.f, 0.1f*face->size, face->size*0.3f});
+ const float halfsize = face->size/float(settings.roi_zoom);
+ face->box = cv::Rect2f(
+ face->center.x + offset[0] - halfsize,
+ face->center.y + offset[1] - halfsize,
+ halfsize*2.f,
+ halfsize*2.f
+ );
+ }
+
+ last_roi = ewa_filter(*last_roi, face->box, float(settings.roi_filter_alpha));
Affine pose = compute_pose(*face);
@@ -487,6 +629,8 @@ bool neuralnet_tracker::detect()
Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const
{
+ // Compute the location the network outputs in 3d space.
+
const mat33 rot_correction = compute_rotation_correction(
normalize(face.center, frame.rows, frame.cols),
intrinsics.focal_length_w);
@@ -507,7 +651,6 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const
------------------------
*/
- // Compute the location the network outputs in 3d space.
const vec3 face_world_pos = image_to_world(
face.center.x, face.center.y, face.size, head_size_mm,
frame.size(),
@@ -519,9 +662,9 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const
const vec3 pos = face_world_pos
+ m * vec3{
- static_cast<float>(s.offset_fwd),
- static_cast<float>(s.offset_up),
- static_cast<float>(s.offset_right)};
+ static_cast<float>(settings.offset_fwd),
+ static_cast<float>(settings.offset_up),
+ static_cast<float>(settings.offset_right)};
return { m, pos };
}
@@ -566,7 +709,7 @@ void neuralnet_tracker::draw_gizmos(
cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1);
}
- if (s.show_network_input)
+ if (settings.show_network_input)
{
cv::Mat netinput = poseestimator->last_network_input();
if (!netinput.empty())
@@ -587,7 +730,6 @@ void neuralnet_tracker::draw_gizmos(
neuralnet_tracker::neuralnet_tracker()
{
opencv_init();
- cv::setNumThreads(num_threads);
}
@@ -609,6 +751,8 @@ module_status neuralnet_tracker::start_tracker(QFrame* videoframe)
layout->addWidget(&*videoWidget);
videoframe->setLayout(&*layout);
videoWidget->show();
+ num_threads = settings.num_threads;
+ cv::setNumThreads(num_threads);
start();
return status_ok();
}
@@ -633,7 +777,7 @@ bool neuralnet_tracker::load_and_initialize_model()
// openmp settings. Which is what we do. omp_set_num_threads directly
// before running the inference pass.
opts.SetIntraOpNumThreads(num_threads);
- opts.SetInterOpNumThreads(num_threads);
+ opts.SetInterOpNumThreads(1);
allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
localizer.emplace(
@@ -656,11 +800,11 @@ bool neuralnet_tracker::load_and_initialize_model()
bool neuralnet_tracker::open_camera()
{
- int fps = enum_to_fps(s.force_fps);
+ int fps = enum_to_fps(settings.force_fps);
QMutexLocker l(&camera_mtx);
- camera = video::make_camera(s.camera_name);
+ camera = video::make_camera(settings.camera_name);
if (!camera)
return false;
@@ -673,7 +817,7 @@ bool neuralnet_tracker::open_camera()
if (fps)
args.fps = fps;
- args.use_mjpeg = s.use_mjpeg;
+ args.use_mjpeg = settings.use_mjpeg;
if (!camera->start(args))
{
@@ -687,7 +831,7 @@ bool neuralnet_tracker::open_camera()
void neuralnet_tracker::set_intrinsics()
{
const int w = grayscale.cols, h = grayscale.rows;
- const double diag_fov = s.fov * M_PI / 180.;
+ const double diag_fov = settings.fov * M_PI / 180.;
const double fov_w = 2.*atan(tan(diag_fov/2.)/sqrt(1. + h/(double)w * h/(double)w));
const double fov_h = 2.*atan(tan(diag_fov/2.)/sqrt(1. + w/(double)h * w/(double)h));
const double focal_length_w = 1. / tan(.5 * fov_w);
@@ -746,8 +890,13 @@ void neuralnet_tracker::run()
set_intrinsics();
+ const auto nt = omp_get_num_threads();
+ omp_set_num_threads(num_threads);
+
detect();
+ omp_set_num_threads(nt);
+
if (frame.rows > 0)
videoWidget->update_image(frame);
@@ -823,26 +972,29 @@ neuralnet_dialog::neuralnet_dialog() :
ui.setupUi(this);
make_fps_combobox();
- tie_setting(s.force_fps, ui.cameraFPS);
+ tie_setting(settings.force_fps, ui.cameraFPS);
for (const auto& str : video::camera_names())
ui.cameraName->addItem(str);
- tie_setting(s.camera_name, ui.cameraName);
- tie_setting(s.fov, ui.cameraFOV);
- tie_setting(s.offset_fwd, ui.tx_spin);
- tie_setting(s.offset_up, ui.ty_spin);
- tie_setting(s.offset_right, ui.tz_spin);
- tie_setting(s.show_network_input, ui.showNetworkInput);
- tie_setting(s.use_mjpeg, ui.use_mjpeg);
+ tie_setting(settings.camera_name, ui.cameraName);
+ tie_setting(settings.fov, ui.cameraFOV);
+ tie_setting(settings.offset_fwd, ui.tx_spin);
+ tie_setting(settings.offset_up, ui.ty_spin);
+ tie_setting(settings.offset_right, ui.tz_spin);
+ tie_setting(settings.show_network_input, ui.showNetworkInput);
+ tie_setting(settings.roi_filter_alpha, ui.roiFilterAlpha);
+ tie_setting(settings.use_mjpeg, ui.use_mjpeg);
+ tie_setting(settings.roi_zoom, ui.roiZoom);
+ tie_setting(settings.num_threads, ui.threadCount);
connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK()));
connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel()));
connect(ui.camera_settings, SIGNAL(clicked()), this, SLOT(camera_settings()));
- connect(&s.camera_name, value_::value_changed<QString>(), this, &neuralnet_dialog::update_camera_settings_state);
+ connect(&settings.camera_name, value_::value_changed<QString>(), this, &neuralnet_dialog::update_camera_settings_state);
- update_camera_settings_state(s.camera_name);
+ update_camera_settings_state(settings.camera_name);
connect(&calib_timer, &QTimer::timeout, this, &neuralnet_dialog::trans_calib_step);
calib_timer.setInterval(35);
@@ -852,7 +1004,7 @@ neuralnet_dialog::neuralnet_dialog() :
void neuralnet_dialog::doOK()
{
- s.b->save();
+ settings.b->save();
close();
}
@@ -871,7 +1023,7 @@ void neuralnet_dialog::camera_settings()
(void)tracker->camera->show_dialog();
}
else
- (void)video::show_dialog(s.camera_name);
+ (void)video::show_dialog(settings.camera_name);
}
@@ -942,9 +1094,9 @@ void neuralnet_dialog::startstop_trans_calib(bool start)
trans_calib.reset();
ui.sample_count_display->setText(QString());
// Tracker must run with zero'ed offset for calibration.
- s.offset_fwd = 0;
- s.offset_up = 0;
- s.offset_right = 0;
+ settings.offset_fwd = 0;
+ settings.offset_up = 0;
+ settings.offset_right = 0;
}
else
{
@@ -952,9 +1104,9 @@ void neuralnet_dialog::startstop_trans_calib(bool start)
qDebug() << "pt: stopping translation calibration";
{
auto [tmp, nsamples] = trans_calib.get_estimate();
- s.offset_fwd = int(tmp[0]);
- s.offset_up = int(tmp[1]);
- s.offset_right = int(tmp[2]);
+ settings.offset_fwd = int(tmp[0]);
+ settings.offset_up = int(tmp[1]);
+ settings.offset_right = int(tmp[2]);
}
}
ui.tx_spin->setEnabled(!start);
@@ -968,7 +1120,7 @@ void neuralnet_dialog::startstop_trans_calib(bool start)
}
-settings::settings() : opts("neuralnet-tracker") {}
+Settings::Settings() : opts("neuralnet-tracker") {}
} // neuralnet_tracker_ns
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
index b6405d1a..25f1a0a2 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
@@ -50,7 +50,7 @@ enum fps_choices
};
-struct settings : opts {
+struct Settings : opts {
value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters
offset_up { b, "offset-up", 0 },
offset_right { b, "offset-right", 0 };
@@ -58,8 +58,11 @@ struct settings : opts {
value<int> fov { b, "field-of-view", 56 };
value<fps_choices> force_fps { b, "force-fps", fps_default };
value<bool> show_network_input { b, "show-network-input", false };
+ value<double> roi_filter_alpha{ b, "roi-filter-alpha", 1. };
+ value<double> roi_zoom{ b, "roi-zoom", 1. };
value<bool> use_mjpeg { b, "use-mjpeg", false };
- settings();
+ value<int> num_threads { b, "num-threads", 1 };
+ Settings();
};
@@ -118,23 +121,26 @@ class PoseEstimator
private:
// Operates on the private image data members
int find_input_intensity_90_pct_quantile() const;
+
+ int64_t model_version = 0;
Ort::Session session{nullptr};
+ Ort::Allocator allocator;
// Inputs
cv::Mat scaled_frame{}, input_mat{};
- Ort::Value input_val{nullptr};
+ std::vector<Ort::Value> input_val;
+ std::vector<const char*> input_names;
// Outputs
cv::Vec<float, 3> output_coord{};
cv::Vec<float, 4> output_quat{};
cv::Vec<float, 4> output_box{};
- Ort::Value output_val[3] = {
- Ort::Value{nullptr},
- Ort::Value{nullptr},
- Ort::Value{nullptr}};
+ std::vector<Ort::Value> output_val;
+ std::vector<const char*> output_names;
+ size_t num_recurrent_states = 0;
double last_inference_time = 0;
- int64_t model_version = 0;
};
+
class neuralnet_tracker : protected virtual QThread, public ITracker
{
Q_OBJECT
@@ -162,7 +168,7 @@ private:
Affine compute_pose(const PoseEstimator::Face &face) const;
- settings s;
+ Settings settings;
std::optional<Localizer> localizer;
std::optional<PoseEstimator> poseestimator;
Ort::Env env{nullptr};
@@ -177,6 +183,7 @@ private:
double fps = 0;
double last_inference_time = 0;
static constexpr double RC = .25;
+ int num_threads = 1;
QMutex mtx; // Protects the pose
Affine pose_;
@@ -197,7 +204,7 @@ private:
void make_fps_combobox();
Ui::Form ui;
- settings s;
+ Settings settings;
// Calibration code mostly taken from point tracker
QTimer calib_timer;
diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts
index a2dcd958..95da8f4c 100644
--- a/tracker-neuralnet/lang/nl_NL.ts
+++ b/tracker-neuralnet/lang/nl_NL.ts
@@ -64,6 +64,22 @@ Don&apos;t roll or change position.</source>
<source>MJPEG</source>
<translation type="unfinished"></translation>
</message>
+ <message>
+ <source>Tuning / Debug</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>ROI Smoothing Alpha</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>ROI Zoom</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>Thread Count</source>
+ <translation type="unfinished"></translation>
+ </message>
</context>
<context>
<name>neuralnet_tracker_ns::neuralnet_dialog</name>
diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts
index 7e8a9c09..a8252299 100644
--- a/tracker-neuralnet/lang/ru_RU.ts
+++ b/tracker-neuralnet/lang/ru_RU.ts
@@ -64,6 +64,22 @@ Don&apos;t roll or change position.</source>
<source>MJPEG</source>
<translation type="unfinished"></translation>
</message>
+ <message>
+ <source>Tuning / Debug</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>ROI Smoothing Alpha</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>ROI Zoom</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>Thread Count</source>
+ <translation type="unfinished"></translation>
+ </message>
</context>
<context>
<name>neuralnet_tracker_ns::neuralnet_dialog</name>
diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts
index 66e1695c..80103fde 100644
--- a/tracker-neuralnet/lang/stub.ts
+++ b/tracker-neuralnet/lang/stub.ts
@@ -64,6 +64,22 @@ Don&apos;t roll or change position.</source>
<source>MJPEG</source>
<translation type="unfinished"></translation>
</message>
+ <message>
+ <source>Tuning / Debug</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>ROI Smoothing Alpha</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>ROI Zoom</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>Thread Count</source>
+ <translation type="unfinished"></translation>
+ </message>
</context>
<context>
<name>neuralnet_tracker_ns::neuralnet_dialog</name>
diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts
index 8e9513b3..f55c12fc 100644
--- a/tracker-neuralnet/lang/zh_CN.ts
+++ b/tracker-neuralnet/lang/zh_CN.ts
@@ -64,6 +64,22 @@ Don&apos;t roll or change position.</source>
<source>MJPEG</source>
<translation type="unfinished"></translation>
</message>
+ <message>
+ <source>Tuning / Debug</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>ROI Smoothing Alpha</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>ROI Zoom</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <source>Thread Count</source>
+ <translation type="unfinished"></translation>
+ </message>
</context>
<context>
<name>neuralnet_tracker_ns::neuralnet_dialog</name>
diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui
index 5f72a809..acbfe909 100644
--- a/tracker-neuralnet/neuralnet-trackercontrols.ui
+++ b/tracker-neuralnet/neuralnet-trackercontrols.ui
@@ -9,15 +9,15 @@
<rect>
<x>0</x>
<y>0</y>
- <width>727</width>
- <height>202</height>
+ <width>721</width>
+ <height>277</height>
</rect>
</property>
<property name="windowTitle">
<string>Tracker settings</string>
</property>
<layout class="QGridLayout" name="gridLayout">
- <item row="5" column="0">
+ <item row="9" column="0">
<widget class="QDialogButtonBox" name="buttonBox">
<property name="standardButtons">
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
@@ -364,11 +364,143 @@ Don't roll or change position.</string>
</layout>
</widget>
</item>
- <item row="4" column="0">
- <widget class="QCheckBox" name="showNetworkInput">
- <property name="text">
- <string>Show Network Input</string>
+ <item row="5" column="0">
+ <widget class="QGroupBox" name="tuningOptionsBox">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <property name="minimumSize">
+ <size>
+ <width>0</width>
+ <height>0</height>
+ </size>
+ </property>
+ <property name="title">
+ <string>Tuning / Debug</string>
</property>
+ <layout class="QGridLayout" name="gridLayout_2">
+ <item row="0" column="7">
+ <widget class="Line" name="line_3">
+ <property name="orientation">
+ <enum>Qt::Vertical</enum>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="8">
+ <widget class="QLabel" name="threadCountLabel">
+ <property name="text">
+ <string>Thread Count</string>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="2">
+ <widget class="QLabel" name="roiFilterAlphaLabel">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Minimum" vsizetype="Minimum">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <property name="text">
+ <string>ROI Smoothing Alpha</string>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="1">
+ <widget class="Line" name="line">
+ <property name="orientation">
+ <enum>Qt::Vertical</enum>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="0">
+ <widget class="QCheckBox" name="showNetworkInput">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <property name="text">
+ <string>Show Network Input</string>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="3">
+ <widget class="QDoubleSpinBox" name="roiFilterAlpha">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <property name="maximumSize">
+ <size>
+ <width>150</width>
+ <height>16777215</height>
+ </size>
+ </property>
+ <property name="wrapping">
+ <bool>false</bool>
+ </property>
+ <property name="decimals">
+ <number>2</number>
+ </property>
+ <property name="maximum">
+ <double>1.000000000000000</double>
+ </property>
+ <property name="singleStep">
+ <double>0.010000000000000</double>
+ </property>
+ <property name="value">
+ <double>1.000000000000000</double>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="5">
+ <widget class="QLabel" name="roiZoomLabel">
+ <property name="text">
+ <string>ROI Zoom</string>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="4">
+ <widget class="Line" name="line_2">
+ <property name="orientation">
+ <enum>Qt::Vertical</enum>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="6">
+ <widget class="QDoubleSpinBox" name="roiZoom">
+ <property name="minimum">
+ <double>0.100000000000000</double>
+ </property>
+ <property name="maximum">
+ <double>2.000000000000000</double>
+ </property>
+ <property name="singleStep">
+ <double>0.010000000000000</double>
+ </property>
+ <property name="value">
+ <double>1.000000000000000</double>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="9">
+ <widget class="QSpinBox" name="threadCount">
+ <property name="minimum">
+ <number>1</number>
+ </property>
+ <property name="maximum">
+ <number>32</number>
+ </property>
+ </widget>
+ </item>
+ </layout>
</widget>
</item>
</layout>