diff options
Diffstat (limited to 'tracker-neuralnet')
-rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 123 | ||||
-rw-r--r-- | tracker-neuralnet/ftnoir_tracker_neuralnet.h | 22 | ||||
-rw-r--r-- | tracker-neuralnet/lang/nl_NL.ts | 32 | ||||
-rw-r--r-- | tracker-neuralnet/lang/ru_RU.ts | 32 | ||||
-rw-r--r-- | tracker-neuralnet/lang/stub.ts | 32 | ||||
-rw-r--r-- | tracker-neuralnet/lang/zh_CN.ts | 32 | ||||
-rw-r--r-- | tracker-neuralnet/neuralnet-trackercontrols.ui | 169 |
7 files changed, 369 insertions, 73 deletions
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index 00f3f281..f849f4e1 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -16,7 +16,6 @@ #include <opencv2/imgcodecs.hpp> #include "compat/timer.hpp" #include <omp.h> -#include <stdexcept> #ifdef _MSC_VER # pragma warning(disable : 4702) @@ -31,6 +30,8 @@ #include <algorithm> #include <chrono> #include <string> +#include <stdexcept> + // Some demo code for onnx // https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp @@ -59,6 +60,35 @@ float sigmoid(float x) } +cv::Rect make_crop_rect_for_aspect(const cv::Size &size, int aspect_w, int aspect_h) +{ + auto [w, h] = size; + if ( w*aspect_h > aspect_w*h ) + { + // Image is too wide + const int new_w = (aspect_w*h)/aspect_h; + return cv::Rect((w - new_w)/2, 0, new_w, h); + } + else + { + const int new_h = (aspect_h*w)/aspect_w; + return cv::Rect(0, (h - new_h)/2, w, new_h); + } +} + +cv::Rect make_crop_rect_multiple_of(const cv::Size &size, int multiple) +{ + const int new_w = (size.width / multiple) * multiple; + const int new_h = (size.height / multiple) * multiple; + return cv::Rect( + (size.width-new_w)/2, + (size.height-new_h)/2, + new_w, + new_h + ); +} + + template<class T> cv::Rect_<T> squarize(const cv::Rect_<T> &r) { @@ -752,7 +782,6 @@ module_status neuralnet_tracker::start_tracker(QFrame* videoframe) videoframe->setLayout(&*layout); videoWidget->show(); num_threads = settings.num_threads; - cv::setNumThreads(num_threads); start(); return status_ok(); } @@ -774,8 +803,7 @@ bool neuralnet_tracker::load_and_initialize_model() auto opts = Ort::SessionOptions{}; // Do thread settings here do anything? // There is a warning which says to control number of threads via - // openmp settings. Which is what we do. omp_set_num_threads directly - // before running the inference pass. + // openmp settings. Which is what we do. opts.SetIntraOpNumThreads(num_threads); opts.SetInterOpNumThreads(1); allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); @@ -800,6 +828,8 @@ bool neuralnet_tracker::load_and_initialize_model() bool neuralnet_tracker::open_camera() { + int rint = std::clamp(*settings.resolution, 0, (int)std::size(resolution_choices)-1); + resolution_tuple res = resolution_choices[rint]; int fps = enum_to_fps(settings.force_fps); QMutexLocker l(&camera_mtx); @@ -811,9 +841,11 @@ bool neuralnet_tracker::open_camera() video::impl::camera::info args {}; - args.width = 320; - args.height = 240; - + if (res.width) + { + args.width = res.width; + args.height = res.height; + } if (fps) args.fps = fps; @@ -844,8 +876,34 @@ void neuralnet_tracker::set_intrinsics() } +class GuardedThreadCountSwitch +{ + int old_num_threads_cv = 1; + int old_num_threads_omp = 1; + public: + GuardedThreadCountSwitch(int num_threads) + { + old_num_threads_cv = cv::getNumThreads(); + old_num_threads_omp = omp_get_num_threads(); + omp_set_num_threads(num_threads); + cv::setNumThreads(num_threads); + } + + ~GuardedThreadCountSwitch() + { + omp_set_num_threads(old_num_threads_omp); + cv::setNumThreads(old_num_threads_cv); + } + + GuardedThreadCountSwitch(const GuardedThreadCountSwitch&) = delete; + GuardedThreadCountSwitch& operator=(const GuardedThreadCountSwitch&) = delete; +}; + + void neuralnet_tracker::run() { + GuardedThreadCountSwitch switch_num_threads_to(num_threads); + if (!open_camera()) return; @@ -870,7 +928,8 @@ void neuralnet_tracker::run() continue; } - auto color = cv::Mat(img.height, img.width, CV_8UC(img.channels), (void*)img.data, img.stride); + auto color = prepare_input_image(img); + color.copyTo(frame); switch (img.channels) @@ -890,13 +949,8 @@ void neuralnet_tracker::run() set_intrinsics(); - const auto nt = omp_get_num_threads(); - omp_set_num_threads(num_threads); - detect(); - omp_set_num_threads(nt); - if (frame.rows > 0) videoWidget->update_image(frame); @@ -907,6 +961,33 @@ void neuralnet_tracker::run() } +cv::Mat neuralnet_tracker::prepare_input_image(const video::frame& frame) +{ + auto img = cv::Mat(frame.height, frame.width, CV_8UC(frame.channels), (void*)frame.data, frame.stride); + + // Crop if aspect ratio is not 4:3 + if (img.rows*4 != img.cols*3) + { + img = img(make_crop_rect_for_aspect(img.size(), 4, 3)); + } + + img = img(make_crop_rect_multiple_of(img.size(), 4)); + + if (img.cols > 640) + { + cv::pyrDown(img, downsized_original_images_[0]); + img = downsized_original_images_[0]; + } + if (img.cols > 640) + { + cv::pyrDown(img, downsized_original_images_[1]); + img = downsized_original_images_[1]; + } + + return img; +} + + void neuralnet_tracker::update_fps(double dt) { const double alpha = dt/(dt + RC); @@ -965,6 +1046,18 @@ void neuralnet_dialog::make_fps_combobox() } } +void neuralnet_dialog::make_resolution_combobox() +{ + int k=0; + for (const auto [w, h] : resolution_choices) + { + const QString s = (w == 0) + ? tr("Default") + : QString::number(w) + " x " + QString::number(h); + ui.resolution->addItem(s, k++); + } +} + neuralnet_dialog::neuralnet_dialog() : trans_calib(1, 2) @@ -972,7 +1065,7 @@ neuralnet_dialog::neuralnet_dialog() : ui.setupUi(this); make_fps_combobox(); - tie_setting(settings.force_fps, ui.cameraFPS); + make_resolution_combobox(); for (const auto& str : video::camera_names()) ui.cameraName->addItem(str); @@ -987,6 +1080,8 @@ neuralnet_dialog::neuralnet_dialog() : tie_setting(settings.use_mjpeg, ui.use_mjpeg); tie_setting(settings.roi_zoom, ui.roiZoom); tie_setting(settings.num_threads, ui.threadCount); + tie_setting(settings.resolution, ui.resolution); + tie_setting(settings.force_fps, ui.cameraFPS); connect(ui.buttonBox, SIGNAL(accepted()), this, SLOT(doOK())); connect(ui.buttonBox, SIGNAL(rejected()), this, SLOT(doCancel())); diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index 25f1a0a2..ace16528 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -25,6 +25,7 @@ #include <memory> #include <cinttypes> +#include <array> #include <onnxruntime_cxx_api.h> @@ -49,6 +50,23 @@ enum fps_choices fps_MAX = 3 }; +struct resolution_tuple +{ + int width; + int height; +}; + +static const std::array<resolution_tuple, 7> resolution_choices = +{{ + { 320, 240 }, + { 640, 480 }, + { 800, 600 }, + { 1024, 768 }, + { 1280, 720 }, + { 1920, 1080}, + { 0, 0 } +}}; + struct Settings : opts { value<int> offset_fwd { b, "offset-fwd", 200 }, // Millimeters @@ -62,6 +80,7 @@ struct Settings : opts { value<double> roi_zoom{ b, "roi-zoom", 1. }; value<bool> use_mjpeg { b, "use-mjpeg", false }; value<int> num_threads { b, "num-threads", 1 }; + value<int> resolution { b, "force-resolution", 0 }; Settings(); }; @@ -159,6 +178,7 @@ private: bool detect(); bool open_camera(); void set_intrinsics(); + cv::Mat prepare_input_image(const video::frame& frame); bool load_and_initialize_model(); void draw_gizmos( cv::Mat frame, @@ -176,6 +196,7 @@ private: CamIntrinsics intrinsics{}; cv::Mat frame, grayscale; + std::array<cv::Mat,2> downsized_original_images_ = {}; // Image pyramid std::optional<cv::Rect2f> last_localizer_roi; std::optional<cv::Rect2f> last_roi; static constexpr float head_size_mm = 200.f; @@ -202,6 +223,7 @@ public: void unregister_tracker() override; private: void make_fps_combobox(); + void make_resolution_combobox(); Ui::Form ui; Settings settings; diff --git a/tracker-neuralnet/lang/nl_NL.ts b/tracker-neuralnet/lang/nl_NL.ts index 95da8f4c..cb6d1da0 100644 --- a/tracker-neuralnet/lang/nl_NL.ts +++ b/tracker-neuralnet/lang/nl_NL.ts @@ -80,6 +80,38 @@ Don't roll or change position.</source> <source>Thread Count</source> <translation type="unfinished"></translation> </message> + <message> + <source>Resolution</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation type="unfinished"></translation> + </message> </context> <context> <name>neuralnet_tracker_ns::neuralnet_dialog</name> diff --git a/tracker-neuralnet/lang/ru_RU.ts b/tracker-neuralnet/lang/ru_RU.ts index a8252299..ed69e9a7 100644 --- a/tracker-neuralnet/lang/ru_RU.ts +++ b/tracker-neuralnet/lang/ru_RU.ts @@ -80,6 +80,38 @@ Don't roll or change position.</source> <source>Thread Count</source> <translation type="unfinished"></translation> </message> + <message> + <source>Resolution</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation type="unfinished"></translation> + </message> </context> <context> <name>neuralnet_tracker_ns::neuralnet_dialog</name> diff --git a/tracker-neuralnet/lang/stub.ts b/tracker-neuralnet/lang/stub.ts index 80103fde..db45f47a 100644 --- a/tracker-neuralnet/lang/stub.ts +++ b/tracker-neuralnet/lang/stub.ts @@ -80,6 +80,38 @@ Don't roll or change position.</source> <source>Thread Count</source> <translation type="unfinished"></translation> </message> + <message> + <source>Resolution</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation type="unfinished"></translation> + </message> </context> <context> <name>neuralnet_tracker_ns::neuralnet_dialog</name> diff --git a/tracker-neuralnet/lang/zh_CN.ts b/tracker-neuralnet/lang/zh_CN.ts index f55c12fc..d13219f0 100644 --- a/tracker-neuralnet/lang/zh_CN.ts +++ b/tracker-neuralnet/lang/zh_CN.ts @@ -80,6 +80,38 @@ Don't roll or change position.</source> <source>Thread Count</source> <translation type="unfinished"></translation> </message> + <message> + <source>Resolution</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Field of view. Needed to transform the pose to world coordinates.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Requested video frame rate. Actual setting may not be supported by the camera.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Number of threads. Can be used to balance the CPU load between the game and the tracker.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Show the image patch that the pose estimation model sees.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</source> + <translation type="unfinished"></translation> + </message> + <message> + <source>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</source> + <translation type="unfinished"></translation> + </message> </context> <context> <name>neuralnet_tracker_ns::neuralnet_dialog</name> diff --git a/tracker-neuralnet/neuralnet-trackercontrols.ui b/tracker-neuralnet/neuralnet-trackercontrols.ui index acbfe909..43b316e9 100644 --- a/tracker-neuralnet/neuralnet-trackercontrols.ui +++ b/tracker-neuralnet/neuralnet-trackercontrols.ui @@ -9,8 +9,8 @@ <rect> <x>0</x> <y>0</y> - <width>721</width> - <height>277</height> + <width>647</width> + <height>305</height> </rect> </property> <property name="windowTitle"> @@ -60,13 +60,10 @@ <string>Camera Configuration</string> </property> <layout class="QGridLayout" name="gridLayout_4"> - <item row="3" column="1"> - <widget class="QComboBox" name="cameraName"> - <property name="sizePolicy"> - <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> - <horstretch>0</horstretch> - <verstretch>0</verstretch> - </sizepolicy> + <item row="4" column="0"> + <widget class="QLabel" name="resolution_label"> + <property name="text"> + <string>Resolution</string> </property> </widget> </item> @@ -78,6 +75,9 @@ <verstretch>0</verstretch> </sizepolicy> </property> + <property name="toolTip"> + <string>Field of view. Needed to transform the pose to world coordinates.</string> + </property> <property name="locale"> <locale language="English" country="UnitedStates"/> </property> @@ -96,20 +96,6 @@ </property> </widget> </item> - <item row="2" column="0"> - <widget class="QLabel" name="label_11"> - <property name="text"> - <string>MJPEG</string> - </property> - </widget> - </item> - <item row="3" column="0"> - <widget class="QLabel" name="label_10"> - <property name="text"> - <string>Camera name</string> - </property> - </widget> - </item> <item row="0" column="0"> <widget class="QLabel" name="label_9"> <property name="text"> @@ -125,31 +111,65 @@ <verstretch>0</verstretch> </sizepolicy> </property> + <property name="toolTip"> + <string>Requested video frame rate. Actual setting may not be supported by the camera.</string> + </property> </widget> </item> - <item row="2" column="1"> - <widget class="QCheckBox" name="use_mjpeg"> + <item row="7" column="1"> + <widget class="QPushButton" name="camera_settings"> <property name="sizePolicy"> - <sizepolicy hsizetype="Minimum" vsizetype="Maximum"> + <sizepolicy hsizetype="Preferred" vsizetype="Maximum"> <horstretch>0</horstretch> <verstretch>0</verstretch> </sizepolicy> </property> <property name="text"> - <string/> + <string>Camera settings</string> </property> </widget> </item> <item row="4" column="1"> - <widget class="QPushButton" name="camera_settings"> + <widget class="QComboBox" name="resolution"> + <property name="toolTip"> + <string>The requested resolution for cases where the camera delivers maximum frame rate only for a particular resolution. The image may still be downscaled to the internal resolution.</string> + </property> + </widget> + </item> + <item row="6" column="1"> + <widget class="QComboBox" name="cameraName"> <property name="sizePolicy"> - <sizepolicy hsizetype="Preferred" vsizetype="Maximum"> + <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> <horstretch>0</horstretch> <verstretch>0</verstretch> </sizepolicy> </property> + </widget> + </item> + <item row="6" column="0"> + <widget class="QLabel" name="label_10"> <property name="text"> - <string>Camera settings</string> + <string>Camera name</string> + </property> + </widget> + </item> + <item row="5" column="1"> + <widget class="QCheckBox" name="use_mjpeg"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Minimum" vsizetype="Maximum"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text"> + <string/> + </property> + </widget> + </item> + <item row="5" column="0"> + <widget class="QLabel" name="label_11"> + <property name="text"> + <string>MJPEG</string> </property> </widget> </item> @@ -189,6 +209,12 @@ <enum>QFrame::Raised</enum> </property> <layout class="QGridLayout" name="gridLayout_11"> + <property name="sizeConstraint"> + <enum>QLayout::SetDefaultConstraint</enum> + </property> + <property name="verticalSpacing"> + <number>0</number> + </property> <item row="1" column="1"> <widget class="QSpinBox" name="ty_spin"> <property name="maximumSize"> @@ -382,21 +408,34 @@ Don't roll or change position.</string> <string>Tuning / Debug</string> </property> <layout class="QGridLayout" name="gridLayout_2"> - <item row="0" column="7"> - <widget class="Line" name="line_3"> + <item row="0" column="10"> + <widget class="Line" name="line_2"> <property name="orientation"> <enum>Qt::Vertical</enum> </property> </widget> </item> - <item row="0" column="8"> - <widget class="QLabel" name="threadCountLabel"> - <property name="text"> - <string>Thread Count</string> + <item row="0" column="1"> + <widget class="QSpinBox" name="threadCount"> + <property name="toolTip"> + <string>Number of threads. Can be used to balance the CPU load between the game and the tracker.</string> + </property> + <property name="minimum"> + <number>1</number> + </property> + <property name="maximum"> + <number>32</number> </property> </widget> </item> - <item row="0" column="2"> + <item row="0" column="4"> + <widget class="Line" name="line"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + </widget> + </item> + <item row="0" column="8"> <widget class="QLabel" name="roiFilterAlphaLabel"> <property name="sizePolicy"> <sizepolicy hsizetype="Minimum" vsizetype="Minimum"> @@ -409,14 +448,21 @@ Don't roll or change position.</string> </property> </widget> </item> - <item row="0" column="1"> - <widget class="Line" name="line"> + <item row="0" column="11"> + <widget class="QLabel" name="roiZoomLabel"> + <property name="text"> + <string>ROI Zoom</string> + </property> + </widget> + </item> + <item row="0" column="2"> + <widget class="Line" name="line_3"> <property name="orientation"> <enum>Qt::Vertical</enum> </property> </widget> </item> - <item row="0" column="0"> + <item row="0" column="3"> <widget class="QCheckBox" name="showNetworkInput"> <property name="sizePolicy"> <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> @@ -424,12 +470,15 @@ Don't roll or change position.</string> <verstretch>0</verstretch> </sizepolicy> </property> + <property name="toolTip"> + <string>Show the image patch that the pose estimation model sees.</string> + </property> <property name="text"> <string>Show Network Input</string> </property> </widget> </item> - <item row="0" column="3"> + <item row="0" column="9"> <widget class="QDoubleSpinBox" name="roiFilterAlpha"> <property name="sizePolicy"> <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> @@ -443,6 +492,9 @@ Don't roll or change position.</string> <height>16777215</height> </size> </property> + <property name="toolTip"> + <string>Amount of smoothing of the face region coordinates. Can help stabilize the pose.</string> + </property> <property name="wrapping"> <bool>false</bool> </property> @@ -460,22 +512,18 @@ Don't roll or change position.</string> </property> </widget> </item> - <item row="0" column="5"> - <widget class="QLabel" name="roiZoomLabel"> + <item row="0" column="0"> + <widget class="QLabel" name="threadCountLabel"> <property name="text"> - <string>ROI Zoom</string> - </property> - </widget> - </item> - <item row="0" column="4"> - <widget class="Line" name="line_2"> - <property name="orientation"> - <enum>Qt::Vertical</enum> + <string>Thread Count</string> </property> </widget> </item> - <item row="0" column="6"> + <item row="0" column="12"> <widget class="QDoubleSpinBox" name="roiZoom"> + <property name="toolTip"> + <string>Zoom factor for the face region. Applied before the patch is fed into the pose estimation model. There is a sweet spot near 1.</string> + </property> <property name="minimum"> <double>0.100000000000000</double> </property> @@ -490,15 +538,18 @@ Don't roll or change position.</string> </property> </widget> </item> - <item row="0" column="9"> - <widget class="QSpinBox" name="threadCount"> - <property name="minimum"> - <number>1</number> + <item row="0" column="13"> + <spacer name="horizontalSpacer"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> </property> - <property name="maximum"> - <number>32</number> + <property name="sizeHint" stdset="0"> + <size> + <width>40</width> + <height>20</height> + </size> </property> - </widget> + </spacer> </item> </layout> </widget> |