diff options
author | Stanislaw Halik <sthalik@misaki.pl> | 2024-04-11 08:55:21 +0200 |
---|---|---|
committer | Stanislaw Halik <sthalik@misaki.pl> | 2024-04-11 21:40:00 +0200 |
commit | c362121f04ffe72eed17126affc9129bbb354c49 (patch) | |
tree | 3a0918ca5f11bb961d465211a49bd4ecdf3659ac /src | |
parent | 9157adfe80dff953687ec364cc612cf45f2b9609 (diff) |
rewrite bitmask impl
It's slower than the previous version but fixes a bug when stride isn't
a multiple of 8.
It can be improved further by setting `auto& byte` all at once instead
of setting individual bits of multiple bytes in the innermost loop.
------------------------------------------------------------
New version Time CPU Iterations
------------------------------------------------------------
Bitmask_mean 1911 us 1893 us 5
Bitmask_median 1911 us 1885 us 5
Bitmask_stddev 2.00 us 18.7 us 5
Bitmask_cv 0.10 % 0.99 % 5
-------------------------------------------------------------
------------------------------------------------------------
Buggy version Time CPU Iterations
------------------------------------------------------------
Bitmask_mean 841 us 841 us 5
Bitmask_median 839 us 837 us 5
Bitmask_stddev 3.29 us 7.80 us 5
Bitmask_cv 0.39 % 0.93 % 5
------------------------------------------------------------
-----------------------------------------------------------
Naive version Time CPU Iterations
------------------------------------------------------------
Bitmask_mean 4006 us 3997 us 10
Bitmask_median 4006 us 3997 us 10
Bitmask_stddev 2.29 us 0.000 us 10
Bitmask_cv 0.06 % 0.00 % 10
------------------------------------------------------------
Diffstat (limited to 'src')
-rw-r--r-- | src/anim-atlas.cpp | 32 | ||||
-rw-r--r-- | src/bitmask.cpp | 101 |
2 files changed, 103 insertions, 30 deletions
diff --git a/src/anim-atlas.cpp b/src/anim-atlas.cpp index 3386de06..355ddeac 100644 --- a/src/anim-atlas.cpp +++ b/src/anim-atlas.cpp @@ -1,7 +1,6 @@ #include "anim-atlas.hpp" #include "compat/assert.hpp" #include "shaders/shader.hpp" -#include "tile-defs.hpp" #include "compat/exception.hpp" #include <Corrade/Containers/BitArrayView.h> #include <Corrade/Containers/StridedArrayView.h> @@ -16,29 +15,6 @@ static constexpr inline auto rot_count = size_t(rotation_COUNT); static_assert(std::size(name_array) == rot_count); static_assert(rot_count == 8); -namespace { - -constexpr uint8_t amin = 32; - -CORRADE_ALWAYS_INLINE void make_bitmask_impl(const ImageView2D& tex, BitArray& array) -{ - array.resetAll(); // slow - const auto pixels = tex.pixels(); - fm_soft_assert(tex.pixelSize() == 4); - fm_soft_assert(pixels.stride()[1] == 4); - - const auto* const src = (const unsigned char*)pixels.data(); - const auto stride = (size_t)pixels.stride()[0]; - const auto size = pixels.size(); - const auto width = size[1], height = size[0]; - - for (auto j = 0u; j < height; j++) - for (auto i = 0u; i < width; i++) - array.set((height - j - 1)*width + i, src[(j*stride + i*4)+3] >= amin); -} - -} // namespace - uint8_t anim_atlas::rotation_to_index(StringView name) { for (uint8_t i = 0; i < rot_count; i++) @@ -156,18 +132,14 @@ auto anim_atlas::frame_quad(const Vector3& center, rotation r, size_t i) const n }}; } -void anim_atlas::make_bitmask_(const ImageView2D& tex, BitArray& array) -{ - return make_bitmask_impl(tex, array); -} - BitArray anim_atlas::make_bitmask(const ImageView2D& tex) { if (tex.pixelSize() == 3) return {}; const auto size = tex.pixels().size(); - auto array = BitArray{NoInit, size[0]*size[1]}; + auto width = (size[0]+7)&~7uz; + auto array = BitArray{NoInit, width*size[1]}; make_bitmask_(tex, array); return array; } diff --git a/src/bitmask.cpp b/src/bitmask.cpp new file mode 100644 index 00000000..282c7a3a --- /dev/null +++ b/src/bitmask.cpp @@ -0,0 +1,101 @@ +#include "compat/defs.hpp" +#include "compat/exception.hpp" +#include "anim-atlas.hpp" +#include <cstring> +#include <cr/BitArray.h> +#include <cr/StridedArrayView.h> +#include <mg/ImageView.h> + +namespace floormat { + +constexpr uint8_t amin = 32; + +#if 1 +using u8 = uint8_t; +using u32 = uint32_t; + +namespace { + +template<u32 Count> +CORRADE_ALWAYS_INLINE +void bm_loop(const u8* __restrict src, u8* __restrict dest, u32 W, u32 H, u32 S, u32 i, u32 j) +{ + auto bitʹ = (H - j - 1)*W + i; + for (auto k = 0u; k < Count; k++) + { + bool value = src[(j * S + (i + k) * 4) + 3] >= amin; + auto bit = bitʹ + k; + auto& byte = dest[bit >> 3]; + byte |= u8{ value } << (bit & 7); + } +} + +template<int N> +CORRADE_ALWAYS_INLINE +void bm_loop_body(const u8* __restrict src, u8* __restrict dest, u32 width, u32 height, u32 stride) +{ + for (auto j = 0u; j < height; j++) + { + auto i = 0u; + while (i < (width & ~7u)) + { + bm_loop<8>(src, dest, width, height, stride, i, j); + i += 8; + } + if constexpr(N > 0) + { + bm_loop<N>(src, dest, width, height, stride, i, j); + i += N; + } + } +} + +} // namespace + +void anim_atlas::make_bitmask_(const ImageView2D& tex, BitArray& bitmask) +{ + const auto pixels = tex.pixels(); + fm_soft_assert(tex.pixelSize() == 4); + + const auto* src = (const u8*)pixels.data(); + auto* const dest = (u8*)bitmask.data(); + const auto stride = (u32)pixels.stride()[0]; + const auto size = pixels.size(); + const auto width = (u32)size[1]; + const auto height = (u32)size[0]; + + fm_debug_assert(bitmask.size() % 8 == 0); + std::memset(bitmask.data(), 0, bitmask.size()/8); + + switch (width & 7) + { + default: std::unreachable(); + case 7: bm_loop_body<7>(src, dest, width, height, stride); break; + case 6: bm_loop_body<6>(src, dest, width, height, stride); break; + case 5: bm_loop_body<5>(src, dest, width, height, stride); break; + case 4: bm_loop_body<4>(src, dest, width, height, stride); break; + case 3: bm_loop_body<3>(src, dest, width, height, stride); break; + case 2: bm_loop_body<2>(src, dest, width, height, stride); break; + case 1: bm_loop_body<1>(src, dest, width, height, stride); break; + case 0: bm_loop_body<0>(src, dest, width, height, stride); break; + } +} +#else +void anim_atlas::make_bitmask_(const ImageView2D& tex, BitArray& bitmask) +{ + const auto pixels = tex.pixels(); + fm_soft_assert(tex.pixelSize() == 4); + bitmask.resetAll(); + + const auto* const src = (const unsigned char*)pixels.data(); + const auto stride = (size_t)pixels.stride()[0]; + const auto size = pixels.size(); + const auto width = size[1], height = size[0]; + + for (auto j = 0u; j < height; j++) + for (auto i = 0u; i < width; i++) + bitmask.set((height - j - 1)*width + i, src[(j*stride + i*4)+3] >= amin); +} +#endif + +} // namespace floormat |