summaryrefslogtreecommitdiffhomepage
path: root/tracker-neuralnet/model_adapters.h
diff options
context:
space:
mode:
authorMichael Welter <michael@welter-4d.de>2022-09-11 20:55:26 +0200
committerStanislaw Halik <sthalik@misaki.pl>2022-11-01 13:51:35 +0100
commit08f1fcad1c74e25f97641a0ccbd229b267ec528c (patch)
tree000b1b276bc7df4a74fd493dab05bcce68801de8 /tracker-neuralnet/model_adapters.h
parent77d6abaf53dbe2ee6334bd59b112e25d694a2f65 (diff)
tracker/nn: Tweaks, refactoring, a deadzone filtering and support for uncertainty estimation
* Add rudimentary test for two functions .. maybe more in future * Fix the rotation correction from vertical translation * Move preview class to new files * Move neural network model adapters to new files * Add utility functions for opencv * Query the model inputs/outputs by name to see what is available * Supports outputs for standard deviation of the data distribution - What you get if you let your model output the full parameters of a gaussian distribution (depending on the inputs) and fit it with negative log likelihood loss. * Disabled support for sequence models * Add support for detection of eye open/close classification. Scale uncertainty estimate up if eyes closed * Add a deadzone filter which activates if the model supports uncertainty quantification. The deadzone scales becomes larger the more uncertain the model/data are. This is mostly supposed to be useful to suppress large estimate errors when the user blinks with the eyes * Fix distance being twice of what it should have been
Diffstat (limited to 'tracker-neuralnet/model_adapters.h')
-rw-r--r--tracker-neuralnet/model_adapters.h102
1 files changed, 102 insertions, 0 deletions
diff --git a/tracker-neuralnet/model_adapters.h b/tracker-neuralnet/model_adapters.h
new file mode 100644
index 00000000..3fbfb861
--- /dev/null
+++ b/tracker-neuralnet/model_adapters.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <optional>
+#include <array>
+#include <vector>
+
+#include <onnxruntime_cxx_api.h>
+#include <opencv2/core.hpp>
+#include "opencv_contrib.h"
+
+
+namespace neuralnet_tracker_ns
+{
+
+// Generally useful sigmoid function
+float sigmoid(float x);
+
+
+class Localizer
+{
+ public:
+ Localizer(Ort::MemoryInfo &allocator_info,
+ Ort::Session &&session);
+
+ // Returns bounding wrt image coordinate of the input image
+ // The preceeding float is the score for being a face normalized to [0,1].
+ std::pair<float, cv::Rect2f> run(
+ const cv::Mat &frame);
+
+ double last_inference_time_millis() const;
+ private:
+ inline static constexpr int INPUT_IMG_WIDTH = 288;
+ inline static constexpr int INPUT_IMG_HEIGHT = 224;
+ Ort::Session session_{nullptr};
+ // Inputs / outputs
+ cv::Mat scaled_frame_{}, input_mat_{};
+ Ort::Value input_val_{nullptr}, output_val_{nullptr};
+ std::array<float, 5> results_;
+ double last_inference_time_ = 0;
+};
+
+
+class PoseEstimator
+{
+ public:
+ struct Face
+ {
+ cv::Quatf rotation;
+ cv::Matx33f rotaxis_cov_tril; // Lower triangular factor of Cholesky decomposition
+ cv::Rect2f box;
+ cv::Point2f center;
+ cv::Point2f center_stddev;
+ float size;
+ float size_stddev;
+ };
+
+ PoseEstimator(Ort::MemoryInfo &allocator_info,
+ Ort::Session &&session);
+ /** Inference
+ *
+ * Coordinates are defined wrt. the image space of the input `frame`.
+ * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom)
+ */
+ std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);
+ // Returns an image compatible with the 'frame' image for displaying.
+ cv::Mat last_network_input() const;
+ double last_inference_time_millis() const;
+ bool has_uncertainty() const { return has_uncertainty_; }
+
+ private:
+ int64_t model_version_ = 0; // Queried meta data from the ONNX file
+ Ort::Session session_{nullptr}; // ONNX's runtime context for running the model
+ Ort::Allocator allocator_; // Memory allocator for tensors
+ // Inputs
+ cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?)
+ std::vector<Ort::Value> input_val_; // Tensors to put into the model
+ std::vector<const char*> input_names_; // Refers to the names in the onnx model.
+ // Outputs
+ cv::Vec<float, 3> output_coord_{}; // 2d Coordinate and head size output.
+ cv::Vec<float, 4> output_quat_{}; // Quaternion output
+ cv::Vec<float, 4> output_box_{}; // Bounding box output
+ cv::Matx33f output_rotaxis_scales_tril_{}; // Lower triangular matrix of LLT factorization of covariance of rotation vector as offset from output quaternion
+ cv::Vec<float, 2> output_eyes_{};
+ cv::Vec<float, 3> output_coord_scales_{};
+ std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in.
+ std::vector<const char*> output_names_; // Refers to the names in the onnx model.
+ // More bookkeeping
+ size_t num_recurrent_states_ = 0;
+ double last_inference_time_ = 0;
+ bool has_uncertainty_ = false;
+ bool has_eye_closed_detection_ = false;
+};
+
+
+// Finds the intensity where x percent of pixels have less intensity than that.
+int find_input_intensity_quantile(const cv::Mat& frame, float percentage);
+
+// Adjust brightness levels to full range and scales the value range to [-0.5, 0.5]
+void normalize_brightness(const cv::Mat& frame, cv::Mat& out);
+
+
+} // namespace neuralnet_tracker_ns \ No newline at end of file