Bitmask: add bitmask tx_size and block_size table
Add look up tables for different tx_size and block_size with
minimum processing unit as 4x4.
Change-Id: I6d37345814067ffdf72126cd67f04c40d0708615
diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c
index 8aef897..28a04a5 100644
--- a/av1/common/av1_loopfilter.c
+++ b/av1/common/av1_loopfilter.c
@@ -275,6 +275,257 @@
1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
};
+#if LOOP_FILTER_BITMASK
+// 256 bit masks (64x64 / 4x4) for left transform size for Y plane.
+// We use 4 uint64_t to represent the 256 bit.
+// Each 1 represents a position where we should apply a loop filter
+// across the left border of an 4x4 block boundary.
+//
+// In the case of TX_8x8-> ( in low order byte first we end up with
+// a mask that looks like this (-- and | are used for better view)
+//
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// -----------------
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+//
+// A loopfilter should be applied to every other 4x4 horizontally.
+// TODO(chengchen): make these tables static
+const FilterMaskY left_txform_mask[TX_SIZES] = {
+ { { 0xffffffffffffffffULL, // TX_4X4,
+ 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
+
+ { { 0x5555555555555555ULL, // TX_8X8,
+ 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL } },
+
+ { { 0x1111111111111111ULL, // TX_16X16,
+ 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL } },
+
+ { { 0x0101010101010101ULL, // TX_32X32,
+ 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL } },
+
+ { { 0x0001000100010001ULL, // TX_64X64,
+ 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL } },
+};
+
+// 256 bit masks (64x64 / 4x4) for above transform size for Y plane.
+// We use 4 uint64_t to represent the 256 bit.
+// Each 1 represents a position where we should apply a loop filter
+// across the top border of an 4x4 block boundary.
+//
+// In the case of TX_8x8-> ( in low order byte first we end up with
+// a mask that looks like this
+//
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// -----------------
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+//
+// A loopfilter should be applied to every other 4x4 horizontally.
+const FilterMaskY above_txform_mask[TX_SIZES] = {
+ { { 0xffffffffffffffffULL, // TX_4X4
+ 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
+
+ { { 0x0000ffff0000ffffULL, // TX_8X8
+ 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL } },
+
+ { { 0x000000000000ffffULL, // TX_16X16
+ 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL } },
+
+ { { 0x000000000000ffffULL, // TX_32X32
+ 0x0000000000000000ULL, 0x000000000000ffffULL, 0x0000000000000000ULL } },
+
+ { { 0x000000000000ffffULL, // TX_64X64
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+};
+
+// 64 bit mask to shift and set for each prediction size. A bit is set for
+// each 4x4 block that would be in the top left most block of the given block
+// size in the 64x64 block.
+const FilterMaskY size_mask_y[BLOCK_SIZES_ALL] = {
+ { { 0x0000000000000001ULL, // BLOCK_4X4
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0000000000010001ULL, // BLOCK_4X8
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0000000000000003ULL, // BLOCK_8X4
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0000000000030003ULL, // BLOCK_8X8
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0003000300030003ULL, // BLOCK_8X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x00000000000f000fULL, // BLOCK_16X8
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x000f000f000f000fULL, // BLOCK_16X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x000f000f000f000fULL, // BLOCK_16X32
+ 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x00ff00ff00ff00ffULL, // BLOCK_32X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x00ff00ff00ff00ffULL, // BLOCK_32X32
+ 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x00ff00ff00ff00ffULL, // BLOCK_32X64
+ 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL } },
+
+ { { 0xffffffffffffffffULL, // BLOCK_64X32
+ 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0xffffffffffffffffULL, // BLOCK_64X64
+ 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
+
+#if CONFIG_EXT_PARTITION
+ // Y plane max coding block size is 128x128, but the codec divides it
+ // into 4 64x64 blocks.
+ // BLOCK_64X128
+ { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
+ // BLOCK_128X64
+ { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
+ // BLOCK_128X128
+ { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
+#endif
+
+ { { 0x0001000100010001ULL, // BLOCK_4X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x000000000000000fULL, // BLOCK_16X4
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0003000300030003ULL, // BLOCK_8X32
+ 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0000000000ff00ffULL, // BLOCK_32X8
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x000f000f000f000fULL, // BLOCK_16X64
+ 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL } },
+
+ { { 0xffffffffffffffffULL, // BLOCK_64X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+#if CONFIG_EXT_PARTITION
+ // BLOCK_32X128
+ { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
+ // BLOCK_128X32
+ { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
+#endif
+};
+
+// U/V plane max transform size is 32x32 (format 420).
+// 64 bit masks (32x32 / 4x4) for left transform size for U/V plane.
+// We use one uint64_t to represent the 64 bit.
+// Each 1 represents a position where we should apply a loop filter
+// across the left border of an 4x4 block boundary.
+//
+// In the case of TX_8x8-> ( in low order byte first we end up with
+// a mask that looks like this
+//
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+const FilterMaskUV left_txform_mask_uv[TX_SIZES - 1] = {
+ 0xffffffffffffffffULL, // TX_4X4
+ 0x5555555555555555ULL, // TX_8X8
+ 0x1111111111111111ULL, // TX_16X16
+ 0x0101010101010101ULL, // TX_32X32
+};
+
+// 64 bit masks (32x32 / 4x4) for above transform size for U/V plane.
+// We use one uint64_t to represent the 64 bit.
+// Each 1 represents a position where we should apply a loop filter
+// across the top border of an 4x4 block boundary.
+//
+// In the case of TX_8x8-> ( in low order byte first we end up with
+// a mask that looks like this
+//
+// 11111111
+// 00000000
+// 11111111
+// 00000000
+// 11111111
+// 00000000
+// 11111111
+// 00000000
+const FilterMaskUV above_txform_mask_uv[TX_SIZES - 1] = {
+ 0xffffffffffffffffULL, // TX_4X4
+ 0x00ff00ff00ff00ffULL, // TX_8X8
+ 0x000000ff000000ffULL, // TX_16X16
+ 0x00000000000000ffULL, // TX_32X32
+};
+
+// Y plane max coding block size is 128x128, but the codec divides it
+// into 4 64x64 blocks. U/V plane follows the pattern and size is
+// halved accordingly (format 420).
+const FilterMaskUV size_mask_u_v[BLOCK_SIZES_ALL] = {
+ 0x0000000000000001ULL, // BLOCK_4X4
+ 0x0000000000000101ULL, // BLOCK_4X8
+ 0x0000000000000003ULL, // BLOCK_8X4
+ 0x0000000000000303ULL, // BLOCK_8X8
+ 0x0000000003030303ULL, // BLOCK_8X16,
+ 0x0000000000000f0fULL, // BLOCK_16X8
+ 0x000000000f0f0f0fULL, // BLOCK_16X16
+ 0x0f0f0f0f0f0f0f0fULL, // BLOCK_16X32,
+ 0x00000000ffffffffULL, // BLOCK_32X16,
+ 0xffffffffffffffffULL, // BLOCK_32X32,
+ 0xffffffffffffffffULL, // BLOCK_32X64,
+ 0xffffffffffffffffULL, // BLOCK_64X32,
+ 0xffffffffffffffffULL, // BLOCK_64X64,
+#if CONFIG_EXT_PARTITION
+ 0xffffffffffffffffULL, // BLOCK_64X128,
+ 0xffffffffffffffffULL, // BLOCK_128X64,
+ 0xffffffffffffffffULL, // BLOCK_128X128,
+#endif
+ 0x0000000001010101ULL, // BLOCK_4X16,
+ 0x000000000000000fULL, // BLOCK_16X4,
+ 0x0303030303030303ULL, // BLOCK_8X32,
+ 0x000000000000ffffULL, // BLOCK_32X8,
+ 0x0f0f0f0f0f0f0f0fULL, // BLOCK_16X64,
+ 0x00000000ffffffffULL, // BLOCK_64X16
+#if CONFIG_EXT_PARTITION
+ 0xffffffffffffffffULL, // BLOCK_32X128,
+ 0xffffffffffffffffULL, // BLOCK_128X32,
+#endif
+};
+#endif // LOOP_FILTER_BITMASK
+
static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
int lvl;