From 6e31ccd0d01b1d693673f6ef9fb46f46fd934685 Mon Sep 17 00:00:00 2001
From: Michael Welter <michael@welter-4d.de>
Date: Sat, 29 May 2021 17:43:26 +0200
Subject: TrackerNeuralnet: Display inference time and all available elements

Meaning informative elements like the pose gizmo and bounding boxes
---
 tracker-neuralnet/ftnoir_tracker_neuralnet.cpp | 73 ++++++++++++++++----------
 tracker-neuralnet/ftnoir_tracker_neuralnet.h   | 10 ++--
 2 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
index 7468b2c1..ee6e7f05 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.cpp
@@ -207,14 +207,14 @@ std::pair<float, cv::Rect2f> Localizer::run(
     const char* input_names[] = {"x"};
     const char* output_names[] = {"logit_box"};
 
-    //Timer t_; t_.start();
+    Timer t; t.start();
 
     const auto nt = omp_get_num_threads();
     omp_set_num_threads(num_threads);
     session.Run(Ort::RunOptions{nullptr}, input_names, &input_val, 1, output_names, &output_val, 1);
     omp_set_num_threads(nt);
 
-    //qDebug() << "localizer: " << t_.elapsed_ms() << " ms\n";
+    last_inference_time = t.elapsed_ms();
 
     const cv::Rect2f roi = unnormalize(cv::Rect2f{
         results[1],
@@ -228,6 +228,12 @@ std::pair<float, cv::Rect2f> Localizer::run(
 }
 
 
+double Localizer::last_inference_time_millis() const
+{
+    return last_inference_time;
+}
+
+
 PoseEstimator::PoseEstimator(Ort::MemoryInfo &allocator_info, Ort::Session &&session) :
     session{std::move(session)},
     scaled_frame(input_img_height, input_img_width, CV_8U),
@@ -317,7 +323,7 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(
     const char* input_names[] = {"x"};
     const char* output_names[] = {"pos_size", "quat", "box"};
 
-    //Timer t_; t_.start();
+    Timer t; t.start();
 
     const auto nt = omp_get_num_threads();
     omp_set_num_threads(num_threads);
@@ -329,7 +335,7 @@ std::optional<PoseEstimator::Face> PoseEstimator::run(
     //        issue. The ONNX api suggests that tensor are allocated in an
     //        arena. Does that matter? Maybe the issue is something else?
 
-    //qDebug() << "pose net: " << t_.elapsed_ms() << " ms\n";
+    last_inference_time = t.elapsed_ms();
 
     // Perform coordinate transformation.
     // From patch-local normalized in [-1,1] to
@@ -372,6 +378,12 @@ cv::Mat PoseEstimator::last_network_input() const
 }
 
 
+double PoseEstimator::last_inference_time_millis() const
+{
+    return last_inference_time;
+}
+
+
 bool neuralnet_tracker::detect()
 {
     // Note: BGR colors!
@@ -379,6 +391,7 @@ bool neuralnet_tracker::detect()
         iou(*last_localizer_roi,*last_roi)<0.25)
     {
         auto [p, rect] = localizer->run(grayscale);
+        last_inference_time += localizer->last_inference_time_millis();
         if (p > 0.5 || rect.height < 5 || rect.width < 5)
         {
             last_localizer_roi = rect;
@@ -392,13 +405,18 @@ bool neuralnet_tracker::detect()
     }
 
     if (!last_roi)
+    {
+        draw_gizmos(frame, {}, {});
         return false;
+    }
 
     auto face = poseestimator->run(grayscale, *last_roi);
+    last_inference_time += poseestimator->last_inference_time_millis();
     
     if (!face)
     {
         last_roi.reset();
+        draw_gizmos(frame, *face, {});
         return false;
     }
 
@@ -457,7 +475,7 @@ Affine neuralnet_tracker::compute_pose(const PoseEstimator::Face &face) const
 
 void neuralnet_tracker::draw_gizmos(
     cv::Mat frame,
-    const PoseEstimator::Face &face,
+    const std::optional<PoseEstimator::Face> &face,
     const Affine& pose) const
 {
     if (last_roi) 
@@ -471,21 +489,28 @@ void neuralnet_tracker::draw_gizmos(
         cv::rectangle(frame, *last_localizer_roi, cv::Scalar(col, 0, 255-col), /*thickness=*/1);
     }
 
-    if (face.size>=1.f)
-        cv::circle(frame, static_cast<cv::Point>(face.center), int(face.size), cv::Scalar(255,255,255), 2);
-    cv::circle(frame, static_cast<cv::Point>(face.center), 3, cv::Scalar(255,255,255), -1);
-
-    auto draw_coord_line = [&](int i, const cv::Scalar& color)
+    if (face)
     {
-        const float vx = -pose.R(2,i);
-        const float vy = -pose.R(1,i);
-        static constexpr float len = 100.f;
-        cv::Point q = face.center + len*cv::Point2f{vx, vy};
-        cv::line(frame, static_cast<cv::Point>(face.center), static_cast<cv::Point>(q), color, 2);
-    };
-    draw_coord_line(0, {0, 0, 255});
-    draw_coord_line(1, {0, 255, 0});
-    draw_coord_line(2, {255, 0, 0});
+        if (face->size>=1.f)
+            cv::circle(frame, static_cast<cv::Point>(face->center), int(face->size), cv::Scalar(255,255,255), 2);
+        cv::circle(frame, static_cast<cv::Point>(face->center), 3, cv::Scalar(255,255,255), -1);
+
+        auto draw_coord_line = [&](int i, const cv::Scalar& color)
+        {
+            const float vx = -pose.R(2,i);
+            const float vy = -pose.R(1,i);
+            static constexpr float len = 100.f;
+            cv::Point q = face->center + len*cv::Point2f{vx, vy};
+            cv::line(frame, static_cast<cv::Point>(face->center), static_cast<cv::Point>(q), color, 2);
+        };
+        draw_coord_line(0, {0, 0, 255});
+        draw_coord_line(1, {0, 255, 0});
+        draw_coord_line(2, {255, 0, 0});
+
+        // Draw the computed joint position
+        auto xy = world_to_image(pose.t);
+        cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1);
+    }
 
     if (s.show_network_input)
     {
@@ -498,14 +523,9 @@ void neuralnet_tracker::draw_gizmos(
             netinput(roi).copyTo(frame(roi));
         }
     }
-    {
-        // Draw the computed joint position
-        auto xy = world_to_image(pose.t);
-        cv::circle(frame, cv::Point(xy[0],xy[1]), 5, cv::Scalar(0,0,255), -1);
-    }
 
     char buf[128];
-    ::snprintf(buf, sizeof(buf), "%d Hz, Max: %d ms", clamp(int(fps), 0, 9999), int(max_frame_time*1000.));
+    ::snprintf(buf, sizeof(buf), "%d Hz, pose inference: %d ms", clamp(int(fps), 0, 9999), int(last_inference_time));
     cv::putText(frame, buf, cv::Point(10, frame.rows-10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 255, 0), 1);
 }
 
@@ -660,6 +680,7 @@ void neuralnet_tracker::run()
 
     while (!isInterruptionRequested())
     {
+        last_inference_time = 0;
         auto t = clk.now();
         {
             QMutexLocker l(&camera_mtx);
@@ -713,8 +734,6 @@ void neuralnet_tracker::update_fps(double dt)
         fps *= 1 - alpha;
         fps += alpha * 1./dt;
     }
-
-    max_frame_time = std::max(max_frame_time, dt);
 }
 
 
diff --git a/tracker-neuralnet/ftnoir_tracker_neuralnet.h b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
index e26689a4..b2eca6e3 100644
--- a/tracker-neuralnet/ftnoir_tracker_neuralnet.h
+++ b/tracker-neuralnet/ftnoir_tracker_neuralnet.h
@@ -82,6 +82,7 @@ class Localizer
         std::pair<float, cv::Rect2f> run(
             const cv::Mat &frame);
 
+        double last_inference_time_millis() const;
     private:
         inline static constexpr int input_img_width = 288;
         inline static constexpr int input_img_height = 224;
@@ -90,6 +91,7 @@ class Localizer
         cv::Mat scaled_frame{}, input_mat{};
         Ort::Value input_val{nullptr}, output_val{nullptr};
         std::array<float, 5> results;
+        double last_inference_time = 0;
 };
 
 
@@ -111,11 +113,10 @@ class PoseEstimator
         std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);
         // Returns an image compatible with the 'frame' image for displaying.
         cv::Mat last_network_input() const;
-
+        double last_inference_time_millis() const;
     private:
         // Operates on the private image data members
         int find_input_intensity_90_pct_quantile() const;
-
         inline static constexpr int input_img_width = 129;
         inline static constexpr int input_img_height = 129;
         Ort::Session session{nullptr};
@@ -130,6 +131,7 @@ class PoseEstimator
             Ort::Value{nullptr}, 
             Ort::Value{nullptr}, 
             Ort::Value{nullptr}};
+        double last_inference_time = 0;
 };
 
 
@@ -154,7 +156,7 @@ private:
     bool load_and_initialize_model();
     void draw_gizmos(
         cv::Mat frame,  
-        const PoseEstimator::Face &face,
+        const std::optional<PoseEstimator::Face> &face,
         const Affine& pose) const;
     void update_fps(double dt);
 
@@ -175,7 +177,7 @@ private:
     static constexpr float head_size_mm = 200.f;
 
     double fps = 0;
-    double max_frame_time = 0;
+    double last_inference_time = 0;
     static constexpr double RC = .25;
 
     QMutex mtx; // Protects the pose
-- 
cgit v1.2.3