1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
#pragma once
#include <optional>
#include <array>
#include <vector>
#include <onnxruntime_cxx_api.h>
#include <opencv2/core.hpp>
#include "opencv_contrib.h"
namespace neuralnet_tracker_ns
{
// Generally useful sigmoid function
float sigmoid(float x);
class Localizer
{
public:
Localizer(Ort::MemoryInfo &allocator_info,
Ort::Session &&session);
// Returns bounding wrt image coordinate of the input image
// The preceeding float is the score for being a face normalized to [0,1].
std::pair<float, cv::Rect2f> run(
const cv::Mat &frame);
double last_inference_time_millis() const;
private:
inline static constexpr int INPUT_IMG_WIDTH = 288;
inline static constexpr int INPUT_IMG_HEIGHT = 224;
Ort::Session session_{nullptr};
// Inputs / outputs
cv::Mat scaled_frame_{}, input_mat_{};
Ort::Value input_val_{nullptr}, output_val_{nullptr};
std::array<float, 5> results_;
double last_inference_time_ = 0;
};
class PoseEstimator
{
public:
struct Face
{
cv::Quatf rotation;
cv::Matx33f rotaxis_cov_tril; // Lower triangular factor of Cholesky decomposition
cv::Rect2f box;
cv::Point2f center;
cv::Point2f center_stddev;
float size;
float size_stddev;
};
PoseEstimator(Ort::MemoryInfo &allocator_info,
Ort::Session &&session);
/** Inference
*
* Coordinates are defined wrt. the image space of the input `frame`.
* X goes right, Z (depth) into the image, Y points down (like pixel coordinates values increase from top to bottom)
*/
std::optional<Face> run(const cv::Mat &frame, const cv::Rect &box);
// Returns an image compatible with the 'frame' image for displaying.
cv::Mat last_network_input() const;
double last_inference_time_millis() const;
bool has_uncertainty() const { return has_uncertainty_; }
private:
int64_t model_version_ = 0; // Queried meta data from the ONNX file
Ort::Session session_{nullptr}; // ONNX's runtime context for running the model
Ort::Allocator allocator_; // Memory allocator for tensors
// Inputs
cv::Mat scaled_frame_{}, input_mat_{}; // Input. One is the original crop, the other is rescaled (?)
std::vector<Ort::Value> input_val_; // Tensors to put into the model
std::vector<const char*> input_names_; // Refers to the names in the onnx model.
// Outputs
cv::Vec<float, 3> output_coord_{}; // 2d Coordinate and head size output.
cv::Vec<float, 4> output_quat_{}; // Quaternion output
cv::Vec<float, 4> output_box_{}; // Bounding box output
cv::Matx33f output_rotaxis_scales_tril_{}; // Lower triangular matrix of LLT factorization of covariance of rotation vector as offset from output quaternion
cv::Vec<float, 2> output_eyes_{};
cv::Vec<float, 3> output_coord_scales_{};
std::vector<Ort::Value> output_val_; // Tensors to put the model outputs in.
std::vector<const char*> output_names_; // Refers to the names in the onnx model.
// More bookkeeping
size_t num_recurrent_states_ = 0;
double last_inference_time_ = 0;
bool has_uncertainty_ = false;
bool has_eye_closed_detection_ = false;
};
// Finds the intensity where x percent of pixels have less intensity than that.
int find_input_intensity_quantile(const cv::Mat& frame, float percentage);
// Adjust brightness levels to full range and scales the value range to [-0.5, 0.5]
void normalize_brightness(const cv::Mat& frame, cv::Mat& out);
} // namespace neuralnet_tracker_ns
|