summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorStanislaw Halik <sthalik@misaki.pl>2017-06-30 11:43:52 +0200
committerStanislaw Halik <sthalik@misaki.pl>2017-06-30 11:48:45 +0200
commit800b9c7a2914c59b1b42192708f6c555eb180b43 (patch)
treea1a20e4f373f8e32f4ac1b434cc6f866c78029af
parent2ed75f0586897405fd26215fcfe7b9d17841affc (diff)
pose-widget: optimize CPU usage slightly
CPU usage of an empty tracker loop fell reliably from 1% to .9%.
-rw-r--r--pose-widget/pose-widget.cpp92
-rw-r--r--pose-widget/pose-widget.hpp17
2 files changed, 71 insertions, 38 deletions
diff --git a/pose-widget/pose-widget.cpp b/pose-widget/pose-widget.cpp
index 8646df30..aaa6bd36 100644
--- a/pose-widget/pose-widget.cpp
+++ b/pose-widget/pose-widget.cpp
@@ -136,14 +136,6 @@ void pose_transform::rotate_sync(double xAngle, double yAngle, double zAngle, do
}, xAngle, yAngle, zAngle, x, y, z);
}
-class Triangle {
- num dot00, dot01, dot11, invDenom;
- vec2 v0, v1, origin;
-public:
- Triangle(const vec2& p1, const vec2& p2, const vec2& p3);
- bool barycentric_coords(const vec2& px, vec2& uv, int& i) const;
-};
-
Triangle::Triangle(const vec2& p1, const vec2& p2, const vec2& p3)
{
origin = p1;
@@ -183,6 +175,9 @@ bool Triangle::barycentric_coords(const vec2& px, vec2& uv, int& i) const
return u >= 0 && v >= 0 && u + v <= 1;
}
+#if defined __GNUG__
+__attribute__((optimize("unroll-loops")))
+#endif
void pose_transform::project_quad_texture()
{
image.fill(Qt::transparent);
@@ -233,55 +228,66 @@ void pose_transform::project_quad_texture()
}
const QImage& tex = dir < 0 ? back : front;
- const int ow = tex.width(), oh = tex.height();
-
- vec2 origs[2][3] =
- {
- {
- vec2(0, 0),
- vec2(ow-1, 0),
- vec2(0, oh-1)
- },
- {
- vec2(ow-1, oh-1),
- vec2(0, oh-1) - vec2(ow-1, oh-1),
- vec2(ow-1, 0) - vec2(ow-1, oh-1),
- }
- };
Triangle t(projected[0], projected[1], projected[2]);
const unsigned orig_pitch = tex.bytesPerLine();
const unsigned dest_pitch = image.bytesPerLine();
- const unsigned char* orig = tex.bits();
- unsigned char* dest = image.bits() + offset*dest_pitch;
+ const unsigned char* restrict orig = tex.bits();
+ unsigned char* restrict dest = image.bits() + offset*dest_pitch;
const int orig_depth = tex.depth() / 8;
const int dest_depth = image.depth() / 8;
- /* image breakage? */
- if (orig_depth != 4)
+ if (unlikely(orig_depth != 4 || dest_depth != 4))
{
qDebug() << "pose-widget: octopus must be saved as .png with 32 bits pixel";
+ qDebug() << "pose-widget: target texture must be ARGB32";
return;
}
- if (dest_depth != 4)
+ static constexpr unsigned xmax = w, ymax = h;
+
+ if (uv_vec.size() < xmax * ymax)
+ uv_vec.resize(xmax * ymax);
+
+ for (unsigned y = 0; y < ymax; y++)
+ for (unsigned x = 0; x < xmax; x++)
+ {
+ uv_& restrict_ref uv = uv_vec[y * xmax + x];
+ if (!t.barycentric_coords(vec2(x, y), uv.coords, uv.i))
+ uv.i = -1;
+ }
+
+ const int ow = tex.width(), oh = tex.height();
+
+ vec2 const origs[2][3] =
{
- qDebug() << "pose-widget: target texture must be ARGB32";
- return;
- }
+ {
+ { 0, 0 },
+ { ow, 0 },
+ { 0, oh },
+ },
+ {
+ { ow, oh },
+ vec2(0, oh) - vec2(ow, oh),
+ vec2(ow, 0) - vec2(ow, oh),
+ }
+ };
- for (int y = 0; y < sy; y++)
- for (int x = 0; x < sx; x++)
+ for (unsigned y = 0; y < ymax; y++)
+ for (unsigned x = 0; x < xmax; x++)
{
- vec2 pos(x, y);
- vec2 uv;
- int i;
+ uv_ const& restrict_ref uv__ = uv_vec[y * xmax + x];
- if (t.barycentric_coords(pos, uv, i))
+ if (uv__.i != -1)
{
+ using uc = unsigned char;
+
+ vec2 const& uv = uv__.coords;
+ int const i = uv__.i;
+
const float fx = origs[i][0].x()
+ uv.x() * origs[i][2].x()
+ uv.y() * origs[i][1].x();
@@ -289,17 +295,21 @@ void pose_transform::project_quad_texture()
+ uv.x() * origs[i][2].y()
+ uv.y() * origs[i][1].y();
- using uc = unsigned char;
+#define BILINEAR_FILTER
+#if defined BILINEAR_FILTER
const unsigned px_ = fx + 1;
const unsigned py_ = fy + 1;
+#endif
const unsigned px = fx;
const unsigned py = fy;
const unsigned orig_pos = py * orig_pitch + px * orig_depth;
+#if defined BILINEAR_FILTER
const unsigned orig_pos_ = py_ * orig_pitch + px_ * orig_depth;
const unsigned orig_pos__ = py * orig_pitch + px_ * orig_depth;
const unsigned orig_pos___ = py_ * orig_pitch + px * orig_depth;
+#endif
// 1, 0 -- ax_, ay
// 0, 1 -- ax, ay_
@@ -307,21 +317,27 @@ void pose_transform::project_quad_texture()
// 0, 0 -- ax, ay
//const uc alpha = (a1 * ax + a3 * ax_) * ay + (a4 * ax + a2 * ax_) * ay_;
+#if defined BILINEAR_FILTER
const float ax_ = fx - unsigned(fx);
const float ay_ = fy - unsigned(fy);
const float ax = 1 - ax_;
const float ay = 1 - ay_;
+#endif
const unsigned pos = y * dest_pitch + (x+offset) * dest_depth;
for (int k = 0; k < 4; k++)
{
+#if defined BILINEAR_FILTER
const uc i = orig[orig_pos + k];
const uc i_ = orig[orig_pos_ + k];
const uc i__ = orig[orig_pos__ + k];
const uc i___ = orig[orig_pos___ + k];
dest[pos + k] = uc((i * ax + i__ * ax_) * ay + (i___ * ax + i_ * ax_) * ay_);
+#else
+ dest[pos + k] = orig[orig_pos + k];
+#endif
}
}
}
diff --git a/pose-widget/pose-widget.hpp b/pose-widget/pose-widget.hpp
index fc606e13..7bc66cd2 100644
--- a/pose-widget/pose-widget.hpp
+++ b/pose-widget/pose-widget.hpp
@@ -16,6 +16,7 @@
#include <mutex>
#include <atomic>
+#include <vector>
#ifdef BUILD_POSE_WIDGET
# define POSE_WIDGET_EXPORT Q_DECL_EXPORT
@@ -37,6 +38,14 @@ using lock_guard = std::unique_lock<std::mutex>;
class pose_widget;
+class Triangle {
+ num dot00, dot01, dot11, invDenom;
+ vec2 v0, v1, origin;
+public:
+ Triangle(const vec2& p1, const vec2& p2, const vec2& p3);
+ bool barycentric_coords(const vec2& px, vec2& uv, int& i) const;
+};
+
struct pose_transform final : private QThread
{
pose_transform(QWidget* dst);
@@ -67,6 +76,14 @@ struct pose_transform final : private QThread
QImage front, back;
QImage image, image2;
+ struct uv_
+ {
+ vec2 coords;
+ int i;
+ };
+
+ std::vector<uv_> uv_vec;
+
std::atomic<bool> fresh;
static constexpr int w = 320, h = 240;