tracker-neuralnet/deadzone_filter.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

#include "deadzone_filter.h"
#include "model_adapters.h"
#include "opencv_contrib.h"
#include "unscented_trafo.h"

#include <opencv2/core/base.hpp>
#include <opencv2/core/matx.hpp>
#include <opencv2/core/quaternion.hpp>

namespace neuralnet_tracker_ns
{

using namespace cvcontrib;

using StateVec = cv::Vec<float,6>;
using StateCov = cv::Matx<float,6,6>;

static constexpr int num_sigmas = ukf_cv::MerweScaledSigmaPoints<6>::num_sigmas;
static constexpr float img_scale = 200.f;
static constexpr float world_scale = 1000.f; // mm

StateCov make_tangent_space_uncertainty_tril(const PoseEstimator::Face &face)
{
    StateCov tril = StateCov::eye();
    tril(0,0) = face.center_stddev.x/img_scale;
    tril(1,1) = face.center_stddev.y/img_scale;
    tril(2,2) = face.size_stddev/img_scale;
    set_minor<3,3>(tril, 3, 3, face.rotaxis_cov_tril);
    return tril;
}


QuatPose apply_offset(const QuatPose& pose, const StateVec& offset)
{
    // Unpack
    const cv::Vec3f dp = { offset[0], offset[1], offset[2] };
    const cv::Quatf dr = cv::Quatf::createFromRvec(cv::Vec3f{ offset[3], offset[4], offset[5] });
    const auto p = pose.pos + dp;
    const auto r = pose.rot * dr;
    return { r, p };
}


PoseEstimator::Face apply_offset(const PoseEstimator::Face& face, const StateVec& offset)
{
    const cv::Quatf dr = cv::Quatf::createFromRvec(cv::Vec3f{ offset[3], offset[4], offset[5] });
    const auto r = face.rotation * dr;
    
    const cv::Point2f p = { 
        face.center.x + offset[0]*img_scale, 
        face.center.y + offset[1]*img_scale 
    };

    // Intercept the case where the head size stddev became so large that the sigma points
    // were created with negative head size (mean - constant*stddev ...). Negative head size
    // is bad. But this is fine. The unscented transform where this function comes into play
    // is designed to handle non-linearities like this.
    const float sz = std::max(0.1f*face.size, face.size + offset[2]*img_scale);

    return PoseEstimator::Face{
        r,
        {},
        {},
        p,
        {},
        sz,
        {}
    };
}


StateVec relative_to(const QuatPose& reference, const QuatPose& pose)
{
    const auto p = pose.pos - reference.pos;
    const auto r = toRotVec(reference.rot.conjugate()*pose.rot);
    return StateVec{ p[0], p[1], p[2], r[0], r[1], r[2] };
}


ukf_cv::SigmaPoints<6> relative_to(const QuatPose& pose, const std::array<QuatPose,num_sigmas>& sigmas)
{
    ukf_cv::SigmaPoints<6> out;
    std::transform(sigmas.begin(), sigmas.end(), out.begin(), [&pose](const QuatPose& s) {
        return relative_to(pose, s);
    });
    return out;
}


std::array<QuatPose,num_sigmas> compute_world_pose_from_sigma_point(const PoseEstimator::Face& face, const ukf_cv::SigmaPoints<6>& sigmas, Face2WorldFunction face2world)
{
    std::array<QuatPose,num_sigmas> out;
    std::transform(sigmas.begin(), sigmas.end(), out.begin(), [face2world=std::move(face2world), &face](const StateVec& sigma_point) {
        // First unpack the state vector and generate quaternion rotation w.r.t image space.
        const auto sigma_face = apply_offset(face, sigma_point);
        // Then transform ...
        QuatPose pose = face2world(sigma_face.rotation, sigma_face.center, sigma_face.size);
        pose.pos /= world_scale;
        return pose;
    });
    return out;
}


StateVec apply_filter_to_offset(const StateVec& offset, const StateCov& offset_cov, float, const FiltParams& params)
{
    // Offset and Cov represent a multivariate normal distribution, which is the probability of the new pose measured w.r.t the previous one.
    // Prob(x) ~exp(-(x-mu)t Cov^-1 (x-mu))
    // We want to attenuate this offset, or zero it out completely, to obtain a deadzone-filter behaviour. The size of the deadzone shall be
    // determined by the covariance projected to the offset direction like so:
    // Take x = mu - mu / |mu| * alpha
    // p(alpha) ~exp(-alpha^2 / |mu|^2 * mut Cov^-1 mu) = ~exp(-alpha^2 / sigma^2) with sigma^2 = mut Cov^-1 mu / |mu|^2.
    // So this projection is like a 1d normal distribution with some standard deviation, which we take to scale the deadzone.

    bool ok = true;

    const float len_div_sigma_sqr = offset.dot(offset_cov.inv(cv::DECOMP_CHOLESKY, &ok) * offset);
    
    const float attenuation = (ok) ? sigmoid((std::sqrt(len_div_sigma_sqr) - params.deadzone_size)*params.deadzone_hardness) : 1.f;

    // {
    //     std::cout << "cov diag: " << offset_cov.diag() << std::endl;
    //     std::cout << "offset: " << cv::norm(offset) << std::endl;
    //     std::cout << "len_div_sigma_sqr: " << cv::norm(len_div_sigma_sqr) << std::endl;
    //      std::cout << "attenuation (" << ok << "): " << attenuation << std::endl;
    // }

    return offset*attenuation;
}


QuatPose apply_filter(const PoseEstimator::Face &face, const QuatPose& previous_pose_, float dt, Face2WorldFunction face2world, const FiltParams& params)
{
    ukf_cv::MerweScaledSigmaPoints<6> unscentedtrafo;
    auto previous_pose = previous_pose_;
    previous_pose.pos /= world_scale;

    // Here we have the covariance matrix for the offset from the observed values in `face`.
    const auto cov_tril = make_tangent_space_uncertainty_tril(face);

    // The filter uses an unscented transform to translate that into a distribution for the offset from the previous pose.
    const ukf_cv::SigmaPoints<6> sigmas = unscentedtrafo.compute_sigmas(to_vec(StateVec::zeros()), cov_tril, true);

    // We have many of these sigma points. This is why that callback comes into play here.
    const std::array<QuatPose,num_sigmas> pose_sigmas = compute_world_pose_from_sigma_point(face, sigmas, std::move(face2world));

    const ukf_cv::SigmaPoints<6> deltas_sigmas = relative_to(previous_pose, pose_sigmas);

    const auto [offset, offset_cov] = unscentedtrafo.compute_statistics(deltas_sigmas);

    // Then the deadzone is applied to the offset and finally the previous pose is transformed by the offset to arrive at the final output.
    const StateVec scaled_offset = apply_filter_to_offset(offset, offset_cov, dt, params);

    QuatPose new_pose = apply_offset(previous_pose, scaled_offset);

    new_pose.pos *= world_scale;

    return new_pose;
}


} // namespace neuralnet_tracker_ns