summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorStanislaw Halik <sthalik@misaki.pl>2024-04-11 08:55:21 +0200
committerStanislaw Halik <sthalik@misaki.pl>2024-04-11 21:40:00 +0200
commitc362121f04ffe72eed17126affc9129bbb354c49 (patch)
tree3a0918ca5f11bb961d465211a49bd4ecdf3659ac /src
parent9157adfe80dff953687ec364cc612cf45f2b9609 (diff)
rewrite bitmask impl
It's slower than the previous version but fixes a bug when stride isn't a multiple of 8. It can be improved further by setting `auto& byte` all at once instead of setting individual bits of multiple bytes in the innermost loop. ------------------------------------------------------------ New version Time CPU Iterations ------------------------------------------------------------ Bitmask_mean 1911 us 1893 us 5 Bitmask_median 1911 us 1885 us 5 Bitmask_stddev 2.00 us 18.7 us 5 Bitmask_cv 0.10 % 0.99 % 5 ------------------------------------------------------------- ------------------------------------------------------------ Buggy version Time CPU Iterations ------------------------------------------------------------ Bitmask_mean 841 us 841 us 5 Bitmask_median 839 us 837 us 5 Bitmask_stddev 3.29 us 7.80 us 5 Bitmask_cv 0.39 % 0.93 % 5 ------------------------------------------------------------ ----------------------------------------------------------- Naive version Time CPU Iterations ------------------------------------------------------------ Bitmask_mean 4006 us 3997 us 10 Bitmask_median 4006 us 3997 us 10 Bitmask_stddev 2.29 us 0.000 us 10 Bitmask_cv 0.06 % 0.00 % 10 ------------------------------------------------------------
Diffstat (limited to 'src')
-rw-r--r--src/anim-atlas.cpp32
-rw-r--r--src/bitmask.cpp101
2 files changed, 103 insertions, 30 deletions
diff --git a/src/anim-atlas.cpp b/src/anim-atlas.cpp
index 3386de06..355ddeac 100644
--- a/src/anim-atlas.cpp
+++ b/src/anim-atlas.cpp
@@ -1,7 +1,6 @@
#include "anim-atlas.hpp"
#include "compat/assert.hpp"
#include "shaders/shader.hpp"
-#include "tile-defs.hpp"
#include "compat/exception.hpp"
#include <Corrade/Containers/BitArrayView.h>
#include <Corrade/Containers/StridedArrayView.h>
@@ -16,29 +15,6 @@ static constexpr inline auto rot_count = size_t(rotation_COUNT);
static_assert(std::size(name_array) == rot_count);
static_assert(rot_count == 8);
-namespace {
-
-constexpr uint8_t amin = 32;
-
-CORRADE_ALWAYS_INLINE void make_bitmask_impl(const ImageView2D& tex, BitArray& array)
-{
- array.resetAll(); // slow
- const auto pixels = tex.pixels();
- fm_soft_assert(tex.pixelSize() == 4);
- fm_soft_assert(pixels.stride()[1] == 4);
-
- const auto* const src = (const unsigned char*)pixels.data();
- const auto stride = (size_t)pixels.stride()[0];
- const auto size = pixels.size();
- const auto width = size[1], height = size[0];
-
- for (auto j = 0u; j < height; j++)
- for (auto i = 0u; i < width; i++)
- array.set((height - j - 1)*width + i, src[(j*stride + i*4)+3] >= amin);
-}
-
-} // namespace
-
uint8_t anim_atlas::rotation_to_index(StringView name)
{
for (uint8_t i = 0; i < rot_count; i++)
@@ -156,18 +132,14 @@ auto anim_atlas::frame_quad(const Vector3& center, rotation r, size_t i) const n
}};
}
-void anim_atlas::make_bitmask_(const ImageView2D& tex, BitArray& array)
-{
- return make_bitmask_impl(tex, array);
-}
-
BitArray anim_atlas::make_bitmask(const ImageView2D& tex)
{
if (tex.pixelSize() == 3)
return {};
const auto size = tex.pixels().size();
- auto array = BitArray{NoInit, size[0]*size[1]};
+ auto width = (size[0]+7)&~7uz;
+ auto array = BitArray{NoInit, width*size[1]};
make_bitmask_(tex, array);
return array;
}
diff --git a/src/bitmask.cpp b/src/bitmask.cpp
new file mode 100644
index 00000000..282c7a3a
--- /dev/null
+++ b/src/bitmask.cpp
@@ -0,0 +1,101 @@
+#include "compat/defs.hpp"
+#include "compat/exception.hpp"
+#include "anim-atlas.hpp"
+#include <cstring>
+#include <cr/BitArray.h>
+#include <cr/StridedArrayView.h>
+#include <mg/ImageView.h>
+
+namespace floormat {
+
+constexpr uint8_t amin = 32;
+
+#if 1
+using u8 = uint8_t;
+using u32 = uint32_t;
+
+namespace {
+
+template<u32 Count>
+CORRADE_ALWAYS_INLINE
+void bm_loop(const u8* __restrict src, u8* __restrict dest, u32 W, u32 H, u32 S, u32 i, u32 j)
+{
+ auto bitʹ = (H - j - 1)*W + i;
+ for (auto k = 0u; k < Count; k++)
+ {
+ bool value = src[(j * S + (i + k) * 4) + 3] >= amin;
+ auto bit = bitʹ + k;
+ auto& byte = dest[bit >> 3];
+ byte |= u8{ value } << (bit & 7);
+ }
+}
+
+template<int N>
+CORRADE_ALWAYS_INLINE
+void bm_loop_body(const u8* __restrict src, u8* __restrict dest, u32 width, u32 height, u32 stride)
+{
+ for (auto j = 0u; j < height; j++)
+ {
+ auto i = 0u;
+ while (i < (width & ~7u))
+ {
+ bm_loop<8>(src, dest, width, height, stride, i, j);
+ i += 8;
+ }
+ if constexpr(N > 0)
+ {
+ bm_loop<N>(src, dest, width, height, stride, i, j);
+ i += N;
+ }
+ }
+}
+
+} // namespace
+
+void anim_atlas::make_bitmask_(const ImageView2D& tex, BitArray& bitmask)
+{
+ const auto pixels = tex.pixels();
+ fm_soft_assert(tex.pixelSize() == 4);
+
+ const auto* src = (const u8*)pixels.data();
+ auto* const dest = (u8*)bitmask.data();
+ const auto stride = (u32)pixels.stride()[0];
+ const auto size = pixels.size();
+ const auto width = (u32)size[1];
+ const auto height = (u32)size[0];
+
+ fm_debug_assert(bitmask.size() % 8 == 0);
+ std::memset(bitmask.data(), 0, bitmask.size()/8);
+
+ switch (width & 7)
+ {
+ default: std::unreachable();
+ case 7: bm_loop_body<7>(src, dest, width, height, stride); break;
+ case 6: bm_loop_body<6>(src, dest, width, height, stride); break;
+ case 5: bm_loop_body<5>(src, dest, width, height, stride); break;
+ case 4: bm_loop_body<4>(src, dest, width, height, stride); break;
+ case 3: bm_loop_body<3>(src, dest, width, height, stride); break;
+ case 2: bm_loop_body<2>(src, dest, width, height, stride); break;
+ case 1: bm_loop_body<1>(src, dest, width, height, stride); break;
+ case 0: bm_loop_body<0>(src, dest, width, height, stride); break;
+ }
+}
+#else
+void anim_atlas::make_bitmask_(const ImageView2D& tex, BitArray& bitmask)
+{
+ const auto pixels = tex.pixels();
+ fm_soft_assert(tex.pixelSize() == 4);
+ bitmask.resetAll();
+
+ const auto* const src = (const unsigned char*)pixels.data();
+ const auto stride = (size_t)pixels.stride()[0];
+ const auto size = pixels.size();
+ const auto width = size[1], height = size[0];
+
+ for (auto j = 0u; j < height; j++)
+ for (auto i = 0u; i < width; i++)
+ bitmask.set((height - j - 1)*width + i, src[(j*stride + i*4)+3] >= amin);
+}
+#endif
+
+} // namespace floormat