From 08f1fcad1c74e25f97641a0ccbd229b267ec528c Mon Sep 17 00:00:00 2001 From: Michael Welter Date: Sun, 11 Sep 2022 20:55:26 +0200 Subject: tracker/nn: Tweaks, refactoring, a deadzone filtering and support for uncertainty estimation * Add rudimentary test for two functions .. maybe more in future * Fix the rotation correction from vertical translation * Move preview class to new files * Move neural network model adapters to new files * Add utility functions for opencv * Query the model inputs/outputs by name to see what is available * Supports outputs for standard deviation of the data distribution - What you get if you let your model output the full parameters of a gaussian distribution (depending on the inputs) and fit it with negative log likelihood loss. * Disabled support for sequence models * Add support for detection of eye open/close classification. Scale uncertainty estimate up if eyes closed * Add a deadzone filter which activates if the model supports uncertainty quantification. The deadzone scales becomes larger the more uncertain the model/data are. This is mostly supposed to be useful to suppress large estimate errors when the user blinks with the eyes * Fix distance being twice of what it should have been --- tracker-neuralnet/model_adapters.h | 102 +++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tracker-neuralnet/model_adapters.h (limited to 'tracker-neuralnet/model_adapters.h') diff --git a/tracker-neuralnet/model_adapters.h b/tracker-neuralnet/model_adapters.h new file mode 100644 index 00000000..3fbfb861 --- /dev/null +++ b/tracker-neuralnet/model_adapters.h @@ -0,0 +1,102 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include "opencv_contrib.h" + + +namespace neuralnet_tracker_ns +{ + +// Generally useful sigmoid function +float sigmoid(float x); + + +class Localizer +{ + public: + Localizer(Ort::MemoryInfo &allocator_info, + Ort::Session &&session); + + // Returns bounding wrt image coordinate of the input image + // The preceeding float is the score for being a face normalized to [0,1]. + std::pair run( + const cv::Mat &frame); + + double last_inference_time_millis() const; + private: + inline static constexpr int INPUT_IMG_WIDTH = 288; + inline static constexpr int INPUT_IMG_HEIGHT = 224; + Ort::Session session_{nullptr}; + // Inputs / outputs + cv::Mat scaled_frame_{}, input_mat_{}; + Ort::Value input_val_{nullptr}, output_val_{nullptr}; + std::array results_; + double last_inference_time_ = 0; +}; + + +class PoseEstimator +{ + public: + struct Face + { + cv::Quatf rotation; + cv::Matx33f rotaxis_cov_tril; // Lower triangular factor of Cholesky decomposition + cv::Rect2f box; + cv::Point2f center; + cv::Point2f center_stddev; + float size; + float size_stddev; + }; + + PoseEstimator(Ort::MemoryInfo &allocator_info, + Ort::Session &&session); + /** Inference + * + * Coordinates are defined wrt. the image space of the input `frame`. + * X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom) + */ + std::optional run(const cv::Mat &frame, const cv::Rect &box); + // Returns an image compatible with the 'frame' image for displaying. + cv::Mat last_network_input() const; + double last_inference_time_millis() const; + bool has_uncertainty() const { return has_uncertainty_; } + + private: + int64_t model_version_ = 0; // Queried meta data from the ONNX file + Ort::Session session_{nullptr}; // ONNX's runtime context for running the model + Ort::Allocator allocator_; // Memory allocator for tensors + // Inputs + cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?) + std::vector input_val_; // Tensors to put into the model + std::vector input_names_; // Refers to the names in the onnx model. + // Outputs + cv::Vec output_coord_{}; // 2d Coordinate and head size output. + cv::Vec output_quat_{}; // Quaternion output + cv::Vec output_box_{}; // Bounding box output + cv::Matx33f output_rotaxis_scales_tril_{}; // Lower triangular matrix of LLT factorization of covariance of rotation vector as offset from output quaternion + cv::Vec output_eyes_{}; + cv::Vec output_coord_scales_{}; + std::vector output_val_; // Tensors to put the model outputs in. + std::vector output_names_; // Refers to the names in the onnx model. + // More bookkeeping + size_t num_recurrent_states_ = 0; + double last_inference_time_ = 0; + bool has_uncertainty_ = false; + bool has_eye_closed_detection_ = false; +}; + + +// Finds the intensity where x percent of pixels have less intensity than that. +int find_input_intensity_quantile(const cv::Mat& frame, float percentage); + +// Adjust brightness levels to full range and scales the value range to [-0.5, 0.5] +void normalize_brightness(const cv::Mat& frame, cv::Mat& out); + + +} // namespace neuralnet_tracker_ns \ No newline at end of file -- cgit v1.2.3