From 6e31ccd0d01b1d693673f6ef9fb46f46fd934685 Mon Sep 17 00:00:00 2001 From: Michael Welter Date: Sat, 29 May 2021 17:43:26 +0200 Subject: TrackerNeuralnet: Display inference time and all available elements Meaning informative elements like the pose gizmo and bounding boxes --- tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 73 ++++++++++++++++---------- tracker-neuralnet/ftnoir_tracker_neuralnet.h | 10 ++-- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp index 7468b2c1..ee6e7f05 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp @@ -207,14 +207,14 @@ std::pair Localizer::run( const char* input_names[] = {"x"}; const char* output_names[] = {"logit_box"}; - //Timer t_; t_.start(); + Timer t; t.start(); const auto nt = omp_get_num_threads(); omp_set_num_threads(num_threads); session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1); omp_set_num_threads(nt); - //qDebug() << "localizer: " << t_.elapsed_ms() << " ms\n"; + last_inference_time = t.elapsed_ms(); const cv::Rect2f roi = unnormalize(cv::Rect2f{ results[1], @@ -228,6 +228,12 @@ std::pair Localizer::run( } +double Localizer::last_inference_time_millis() const +{ + return last_inference_time; +} + + PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session) : session{std::move(session)}, scaled_frame(input_img_height, input_img_width, CV_8U), @@ -317,7 +323,7 @@ std::optional PoseEstimator::run( const char* input_names[] = {"x"}; const char* output_names[] = {"pos_size", "quat", "box"}; - //Timer t_; t_.start(); + Timer t; t.start(); const auto nt = omp_get_num_threads(); omp_set_num_threads(num_threads); @@ -329,7 +335,7 @@ std::optional PoseEstimator::run( // issue. The ONNX api suggests that tensor are allocated in an // arena. Does that matter? Maybe the issue is something else? - //qDebug() << "pose net: " << t_.elapsed_ms() << " ms\n"; + last_inference_time = t.elapsed_ms(); // Perform coordinate transformation. // From patch-local normalized in [-1,1] to @@ -372,6 +378,12 @@ cv::Mat PoseEstimator::last_network_input() const } +double PoseEstimator::last_inference_time_millis() const +{ + return last_inference_time; +} + + bool neuralnet_tracker::detect() { // Note: BGR colors! @@ -379,6 +391,7 @@ bool neuralnet_tracker::detect() iou(*last_localizer_roi,*last_roi)<0.25) { auto [p, rect] = localizer->run(grayscale); + last_inference_time += localizer->last_inference_time_millis(); if (p > 0.5 || rect.height < 5 || rect.width < 5) { last_localizer_roi = rect; @@ -392,13 +405,18 @@ bool neuralnet_tracker::detect() } if (!last_roi) + { + draw_gizmos(frame, {}, {}); return false; + } auto face = poseestimator->run(grayscale, *last_roi); + last_inference_time += poseestimator->last_inference_time_millis(); if (!face) { last_roi.reset(); + draw_gizmos(frame, *face, {}); return false; } @@ -457,7 +475,7 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const void neuralnet_tracker::draw_gizmos( cv::Mat frame, - const PoseEstimator::Face &face, + const std::optional &face, const Affine& pose) const { if (last_roi) @@ -471,21 +489,28 @@ void neuralnet_tracker::draw_gizmos( cv::rectangle(frame, *last_localizer_roi, cv::Scalar(col, 0, 255-col), /*thickness=*/1); } - if (face.size>=1.f) - cv::circle(frame, static_cast(face.center), int(face.size), cv::Scalar(255,255,255), 2); - cv::circle(frame, static_cast(face.center), 3, cv::Scalar(255,255,255), -1); - - auto draw_coord_line = [&](int i, const cv::Scalar& color) + if (face) { - const float vx = -pose.R(2,i); - const float vy = -pose.R(1,i); - static constexpr float len = 100.f; - cv::Point q = face.center + len*cv::Point2f{vx, vy}; - cv::line(frame, static_cast(face.center), static_cast(q), color, 2); - }; - draw_coord_line(0, {0, 0, 255}); - draw_coord_line(1, {0, 255, 0}); - draw_coord_line(2, {255, 0, 0}); + if (face->size>=1.f) + cv::circle(frame, static_cast(face->center), int(face->size), cv::Scalar(255,255,255), 2); + cv::circle(frame, static_cast(face->center), 3, cv::Scalar(255,255,255), -1); + + auto draw_coord_line = [&](int i, const cv::Scalar& color) + { + const float vx = -pose.R(2,i); + const float vy = -pose.R(1,i); + static constexpr float len = 100.f; + cv::Point q = face->center + len*cv::Point2f{vx, vy}; + cv::line(frame, static_cast(face->center), static_cast(q), color, 2); + }; + draw_coord_line(0, {0, 0, 255}); + draw_coord_line(1, {0, 255, 0}); + draw_coord_line(2, {255, 0, 0}); + + // Draw the computed joint position + auto xy = world_to_image(pose.t); + cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1); + } if (s.show_network_input) { @@ -498,14 +523,9 @@ void neuralnet_tracker::draw_gizmos( netinput(roi).copyTo(frame(roi)); } } - { - // Draw the computed joint position - auto xy = world_to_image(pose.t); - cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1); - } char buf[128]; - ::snprintf(buf, sizeof(buf), "%d Hz, Max: %d ms", clamp(int(fps), 0, 9999), int(max_frame_time*1000.)); + ::snprintf(buf, sizeof(buf), "%d Hz, pose inference: %d ms", clamp(int(fps), 0, 9999), int(last_inference_time)); cv::putText(frame, buf, cv::Point(10, frame.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1); } @@ -660,6 +680,7 @@ void neuralnet_tracker::run() while (!isInterruptionRequested()) { + last_inference_time = 0; auto t = clk.now(); { QMutexLocker l(&camera_mtx); @@ -713,8 +734,6 @@ void neuralnet_tracker::update_fps(double dt) fps *= 1 - alpha; fps += alpha * 1./dt; } - - max_frame_time = std::max(max_frame_time, dt); } diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h index e26689a4..b2eca6e3 100644 --- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h +++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h @@ -82,6 +82,7 @@ class Localizer std::pair run( const cv::Mat &frame); + double last_inference_time_millis() const; private: inline static constexpr int input_img_width = 288; inline static constexpr int input_img_height = 224; @@ -90,6 +91,7 @@ class Localizer cv::Mat scaled_frame{}, input_mat{}; Ort::Value input_val{nullptr}, output_val{nullptr}; std::array results; + double last_inference_time = 0; }; @@ -111,11 +113,10 @@ class PoseEstimator std::optional run(const cv::Mat &frame, const cv::Rect &box); // Returns an image compatible with the 'frame' image for displaying. cv::Mat last_network_input() const; - + double last_inference_time_millis() const; private: // Operates on the private image data members int find_input_intensity_90_pct_quantile() const; - inline static constexpr int input_img_width = 129; inline static constexpr int input_img_height = 129; Ort::Session session{nullptr}; @@ -130,6 +131,7 @@ class PoseEstimator Ort::Value{nullptr}, Ort::Value{nullptr}, Ort::Value{nullptr}}; + double last_inference_time = 0; }; @@ -154,7 +156,7 @@ private: bool load_and_initialize_model(); void draw_gizmos( cv::Mat frame, - const PoseEstimator::Face &face, + const std::optional &face, const Affine& pose) const; void update_fps(double dt); @@ -175,7 +177,7 @@ private: static constexpr float head_size_mm = 200.f; double fps = 0; - double max_frame_time = 0; + double last_inference_time = 0; static constexpr double RC = .25; QMutex mtx; // Protects the pose -- cgit v1.2.3