Bitmask patch 0: Store info for bitmask at decoder
Purpose: build bitmask for loop filtering to speed up the decoder.
Result: make decoder 6% faster, on single thread.
The flag LOOP_FILTER_BITMASK controls whether the new feature is used.
Now it is turned off, I will turn it on after all following CLs are
checked int. Please expect a few following up CLs.
This CL:
Store information that determines filter is applied or not at decoder.
Then build bitmask for the whole frame, after all tiles decoded.
Change-Id: I3902ed825b674cff8131d0e9cbc14df3f9a566f6
diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c
index df6d4f8..0b6020a 100644
--- a/av1/common/av1_loopfilter.c
+++ b/av1/common/av1_loopfilter.c
@@ -68,23 +68,6 @@
// 10101010|10101010
//
// A loopfilter should be applied to every other 4x4 horizontally.
-// TODO(chengchen): make these tables static
-const FilterMask left_txform_mask[TX_SIZES] = {
- { { 0xffffffffffffffffULL, // TX_4X4,
- 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
-
- { { 0x5555555555555555ULL, // TX_8X8,
- 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL } },
-
- { { 0x1111111111111111ULL, // TX_16X16,
- 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL } },
-
- { { 0x0101010101010101ULL, // TX_32X32,
- 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL } },
-
- { { 0x0001000100010001ULL, // TX_64X64,
- 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL } },
-};
// 256 bit masks (64x64 / 4x4) for above transform size for Y plane.
// We use 4 uint64_t to represent the 256 bit.
@@ -113,92 +96,6 @@
// 00000000|00000000
//
// A loopfilter should be applied to every other 4x4 horizontally.
-const FilterMask above_txform_mask[TX_SIZES] = {
- { { 0xffffffffffffffffULL, // TX_4X4
- 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
-
- { { 0x0000ffff0000ffffULL, // TX_8X8
- 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL } },
-
- { { 0x000000000000ffffULL, // TX_16X16
- 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL } },
-
- { { 0x000000000000ffffULL, // TX_32X32
- 0x0000000000000000ULL, 0x000000000000ffffULL, 0x0000000000000000ULL } },
-
- { { 0x000000000000ffffULL, // TX_64X64
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-};
-
-// 64 bit mask to shift and set for each prediction size. A bit is set for
-// each 4x4 block that would be in the top left most block of the given block
-// size in the 64x64 block.
-const FilterMask size_mask_y[BLOCK_SIZES_ALL] = {
- { { 0x0000000000000001ULL, // BLOCK_4X4
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0000000000010001ULL, // BLOCK_4X8
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0000000000000003ULL, // BLOCK_8X4
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0000000000030003ULL, // BLOCK_8X8
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0003000300030003ULL, // BLOCK_8X16
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x00000000000f000fULL, // BLOCK_16X8
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x000f000f000f000fULL, // BLOCK_16X16
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x000f000f000f000fULL, // BLOCK_16X32
- 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x00ff00ff00ff00ffULL, // BLOCK_32X16
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x00ff00ff00ff00ffULL, // BLOCK_32X32
- 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x00ff00ff00ff00ffULL, // BLOCK_32X64
- 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL } },
-
- { { 0xffffffffffffffffULL, // BLOCK_64X32
- 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0xffffffffffffffffULL, // BLOCK_64X64
- 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
- // Y plane max coding block size is 128x128, but the codec divides it
- // into 4 64x64 blocks.
- // BLOCK_64X128
- { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
- // BLOCK_128X64
- { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
- // BLOCK_128X128
- { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
-
- { { 0x0001000100010001ULL, // BLOCK_4X16
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x000000000000000fULL, // BLOCK_16X4
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0003000300030003ULL, // BLOCK_8X32
- 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0000000000ff00ffULL, // BLOCK_32X8
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x000f000f000f000fULL, // BLOCK_16X64
- 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL } },
-
- { { 0xffffffffffffffffULL, // BLOCK_64X16
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } }
-};
LoopFilterMask *get_loop_filter_mask(const AV1_COMMON *const cm, int mi_row,
int mi_col) {
@@ -248,10 +145,10 @@
SIMD_WIDTH);
}
}
-static uint8_t get_filter_level(const AV1_COMMON *cm,
- const loop_filter_info_n *lfi_n,
- const int dir_idx, int plane,
- const MB_MODE_INFO *mbmi) {
+
+uint8_t get_filter_level(const AV1_COMMON *cm, const loop_filter_info_n *lfi_n,
+ const int dir_idx, int plane,
+ const MB_MODE_INFO *mbmi) {
const int segment_id = mbmi->segment_id;
if (cm->delta_lf_present_flag) {
int delta_lf;
@@ -389,7 +286,7 @@
// After locating which uint64_t, mi_row % 4 is the
// row offset, and each row has 16 = 1 << stride_log2 4x4 units.
// Therefore, shift = (row << stride_log2) + mi_col;
-static int get_index_shift(int mi_col, int mi_row, int *index) {
+int get_index_shift(int mi_col, int mi_row, int *index) {
// *index = mi_row >> 2;
// rows = mi_row % 4;
// stride_log2 = 4;
@@ -599,11 +496,12 @@
const TX_SIZE prev_tx_size =
plane ? av1_get_max_uv_txsize(mbmi_prev->sb_type, ssx, ssy)
: mbmi_prev->tx_size;
- const TX_SIZE min_tx_size =
- (dir == VERT_EDGE) ? AOMMIN(txsize_horz_map[tx_size],
- txsize_horz_map[prev_tx_size])
- : AOMMIN(txsize_vert_map[tx_size],
- txsize_vert_map[prev_tx_size]);
+ TX_SIZE min_tx_size = (dir == VERT_EDGE)
+ ? AOMMIN(txsize_horz_map[tx_size],
+ txsize_horz_map[prev_tx_size])
+ : AOMMIN(txsize_vert_map[tx_size],
+ txsize_vert_map[prev_tx_size]);
+ min_tx_size = AOMMIN(min_tx_size, TX_16X16);
assert(min_tx_size < TX_SIZES);
const int row = r % MI_SIZE_64X64;
const int col = c % MI_SIZE_64X64;
diff --git a/av1/common/av1_loopfilter.h b/av1/common/av1_loopfilter.h
index 1229c0d..7f4ade7 100644
--- a/av1/common/av1_loopfilter.h
+++ b/av1/common/av1_loopfilter.h
@@ -67,6 +67,14 @@
// V plane vertical edge and horizontal edge filter level
uint8_t lfl_v_hor[MI_SIZE_64X64][MI_SIZE_64X64];
uint8_t lfl_v_ver[MI_SIZE_64X64][MI_SIZE_64X64];
+
+ // other info
+ FilterMask skip;
+ FilterMask is_vert_border;
+ FilterMask is_horz_border;
+ // Y or UV planes, 5 tx sizes: 4x4, 8x8, 16x16, 32x32, 64x64
+ FilterMask tx_size_ver[2][5];
+ FilterMask tx_size_hor[2][5];
} LoopFilterMask;
#endif // LOOP_FILTER_BITMASK
@@ -119,9 +127,15 @@
void av1_loop_filter_frame_init(struct AV1Common *cm, int plane_start,
int plane_end);
+#if LOOP_FILTER_BITMASK
void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
struct macroblockd *mbd, int plane_start,
int plane_end, int partial_frame);
+#else
+void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
+ struct macroblockd *mbd, int plane_start,
+ int plane_end, int partial_frame);
+#endif
void av1_filter_block_plane_vert(const struct AV1Common *const cm,
const MACROBLOCKD *const xd, const int plane,
@@ -142,6 +156,9 @@
MACROBLOCKD *xd;
} LFWorkerData;
+uint8_t get_filter_level(const struct AV1Common *cm,
+ const loop_filter_info_n *lfi_n, const int dir_idx,
+ int plane, const MB_MODE_INFO *mbmi);
#if LOOP_FILTER_BITMASK
void av1_setup_bitmask(struct AV1Common *const cm, int mi_row, int mi_col,
int plane, int subsampling_x, int subsampling_y,
@@ -154,6 +171,43 @@
void av1_filter_block_plane_hor(struct AV1Common *const cm,
struct macroblockd_plane *const plane, int pl,
int mi_row, int mi_col);
+LoopFilterMask *get_loop_filter_mask(const struct AV1Common *const cm,
+ int mi_row, int mi_col);
+int get_index_shift(int mi_col, int mi_row, int *index);
+
+static const FilterMask left_txform_mask[TX_SIZES] = {
+ { { 0x0000000000000001ULL, // TX_4X4,
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0000000000010001ULL, // TX_8X8,
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0001000100010001ULL, // TX_16X16,
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0001000100010001ULL, // TX_32X32,
+ 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+
+ { { 0x0001000100010001ULL, // TX_64X64,
+ 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL } },
+};
+
+static const uint64_t above_txform_mask[2][TX_SIZES] = {
+ {
+ 0x0000000000000001ULL, // TX_4X4
+ 0x0000000000000003ULL, // TX_8X8
+ 0x000000000000000fULL, // TX_16X16
+ 0x00000000000000ffULL, // TX_32X32
+ 0x000000000000ffffULL, // TX_64X64
+ },
+ {
+ 0x0000000000000001ULL, // TX_4X4
+ 0x0000000000000005ULL, // TX_8X8
+ 0x0000000000000055ULL, // TX_16X16
+ 0x0000000000005555ULL, // TX_32X32
+ 0x0000000055555555ULL, // TX_64X64
+ },
+};
#endif
#ifdef __cplusplus
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 03cedef..2cfaf37 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1329,6 +1329,261 @@
}
}
+#if LOOP_FILTER_BITMASK
+static void store_bitmask_info(AV1_COMMON *cm, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, MB_MODE_INFO *mbmi, int type) {
+ if (type == 0) {
+ // TODO(chengchen): optimize step
+ LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
+ // vertical direction
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize]; ++r) {
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize];) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ const uint64_t mask = ((uint64_t)1 << shift);
+ // Y plane
+ const int blk_row = r & (mi_size_high[bsize] - 1);
+ const int blk_col = c & (mi_size_wide[bsize] - 1);
+ const TX_SIZE mb_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
+ bsize, blk_row, blk_col)];
+ const TX_SIZE tx_size = txsize_horz_map[mb_tx_size];
+ lfm->tx_size_ver[0][tx_size].bits[index] |= mask;
+ // U/V plane
+ const TX_SIZE tx_size_uv = txsize_horz_map[av1_get_max_uv_txsize(
+ mbmi->sb_type, cm->seq_params.subsampling_x,
+ cm->seq_params.subsampling_y)];
+ lfm->tx_size_ver[1][tx_size_uv].bits[index] |= mask;
+
+ c += tx_size_wide_unit[tx_size];
+ }
+ }
+ // horizontal direction
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize]; ++c) {
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize];) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ const uint64_t mask = ((uint64_t)1 << shift);
+ // Y plane
+ const int blk_row = r & (mi_size_high[bsize] - 1);
+ const int blk_col = c & (mi_size_wide[bsize] - 1);
+ const TX_SIZE mb_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
+ bsize, blk_row, blk_col)];
+ const TX_SIZE tx_size = txsize_vert_map[mb_tx_size];
+ lfm->tx_size_hor[0][tx_size].bits[index] |= mask;
+ // U/V plane
+ const TX_SIZE tx_size_uv = txsize_vert_map[av1_get_max_uv_txsize(
+ mbmi->sb_type, cm->seq_params.subsampling_x,
+ cm->seq_params.subsampling_y)];
+ lfm->tx_size_hor[1][tx_size_uv].bits[index] |= mask;
+
+ r += tx_size_high_unit[tx_size];
+ }
+ }
+ // store other info
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize]; ++r) {
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize];) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ const uint64_t mask = ((uint64_t)1 << shift);
+ if (mbmi->skip && is_inter_block(mbmi)) lfm->skip.bits[index] |= mask;
+ if (r == mi_row) lfm->is_horz_border.bits[index] |= mask;
+ if (c == mi_col) lfm->is_vert_border.bits[index] |= mask;
+ const int blk_row = r & (mi_size_high[bsize] - 1);
+ const int blk_col = c & (mi_size_wide[bsize] - 1);
+ const TX_SIZE mb_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
+ bsize, blk_row, blk_col)];
+ c += tx_size_wide_unit[mb_tx_size];
+ }
+ }
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize]; ++c) {
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize];) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ const uint64_t mask = ((uint64_t)1 << shift);
+ if (mbmi->skip && is_inter_block(mbmi)) lfm->skip.bits[index] |= mask;
+ if (r == mi_row) lfm->is_horz_border.bits[index] |= mask;
+ if (c == mi_col) lfm->is_vert_border.bits[index] |= mask;
+ const int blk_row = r & (mi_size_high[bsize] - 1);
+ const int blk_col = c & (mi_size_wide[bsize] - 1);
+ const TX_SIZE mb_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
+ bsize, blk_row, blk_col)];
+ r += tx_size_high_unit[mb_tx_size];
+ }
+ }
+ const uint8_t level_vert_y = get_filter_level(cm, &cm->lf_info, 0, 0, mbmi);
+ const uint8_t level_vert_u = get_filter_level(cm, &cm->lf_info, 0, 1, mbmi);
+ const uint8_t level_vert_v = get_filter_level(cm, &cm->lf_info, 0, 2, mbmi);
+ const uint8_t level_horz_y = get_filter_level(cm, &cm->lf_info, 1, 0, mbmi);
+ const uint8_t level_horz_u = get_filter_level(cm, &cm->lf_info, 1, 1, mbmi);
+ const uint8_t level_horz_v = get_filter_level(cm, &cm->lf_info, 1, 2, mbmi);
+ const int col_start = mi_col % MI_SIZE_64X64;
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize]; r++) {
+ const int row = r % MI_SIZE_64X64;
+ memset(&lfm->lfl_y_ver[row][col_start], level_vert_y,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_u_ver[row][col_start], level_vert_u,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_v_ver[row][col_start], level_vert_v,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_y_hor[row][col_start], level_horz_y,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_u_hor[row][col_start], level_horz_u,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_v_hor[row][col_start], level_horz_v,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ }
+ } else {
+ // TODO(chengchen): optimize step
+ LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
+ // vertical direction
+ const TX_SIZE tx_size_y_vert = txsize_vert_map[mbmi->tx_size];
+ const TX_SIZE tx_size_y_horz = txsize_horz_map[mbmi->tx_size];
+ const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
+ mbmi->sb_type, cm->seq_params.subsampling_x,
+ cm->seq_params.subsampling_y)];
+ const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
+ mbmi->sb_type, cm->seq_params.subsampling_x,
+ cm->seq_params.subsampling_y)];
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize];
+ r += tx_size_high_unit[mbmi->tx_size]) {
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize];
+ c += tx_size_wide_unit[mbmi->tx_size]) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ if (tx_size_y_vert <= TX_8X8) {
+ lfm->tx_size_ver[0][tx_size_y_horz].bits[index] |=
+ (left_txform_mask[tx_size_y_vert].bits[0] << shift);
+ } else if (tx_size_y_vert == TX_16X16) {
+ lfm->tx_size_ver[0][tx_size_y_horz].bits[index] |=
+ (left_txform_mask[tx_size_y_vert].bits[0] << col);
+ } else if (tx_size_y_vert == TX_32X32) {
+ for (int i = 0; i < 2; ++i) {
+ lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |=
+ (left_txform_mask[tx_size_y_vert].bits[i] << col);
+ }
+ } else {
+ for (int i = 0; i < 4; ++i) {
+ lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |=
+ (left_txform_mask[tx_size_y_vert].bits[i] << col);
+ }
+ }
+ }
+ }
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize];
+ r += tx_size_high_unit[tx_size_uv_vert]) {
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize];
+ c += tx_size_wide_unit[tx_size_uv_horz]) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ if (tx_size_uv_vert <= TX_8X8) {
+ lfm->tx_size_ver[1][tx_size_uv_horz].bits[index] |=
+ (left_txform_mask[tx_size_uv_vert].bits[0] << shift);
+ } else if (tx_size_uv_vert == TX_16X16) {
+ lfm->tx_size_ver[1][tx_size_uv_horz].bits[index] |=
+ (left_txform_mask[tx_size_uv_vert].bits[0] << col);
+ } else if (tx_size_uv_vert == TX_32X32) {
+ for (int i = 0; i < 2; ++i) {
+ lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |=
+ (left_txform_mask[tx_size_uv_vert].bits[i] << col);
+ }
+ } else {
+ for (int i = 0; i < 4; ++i) {
+ lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |=
+ (left_txform_mask[tx_size_uv_vert].bits[i] << col);
+ }
+ }
+ }
+ }
+ // horizontal direction
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize];
+ c += tx_size_wide_unit[mbmi->tx_size]) {
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize];
+ r += tx_size_high_unit[mbmi->tx_size]) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ lfm->tx_size_hor[0][tx_size_y_vert].bits[index] |=
+ (above_txform_mask[0][tx_size_y_horz] << shift);
+ }
+ }
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize];
+ c += tx_size_wide_unit[tx_size_uv_horz]) {
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize];
+ r += tx_size_high_unit[tx_size_uv_vert]) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ lfm->tx_size_hor[1][tx_size_uv_vert].bits[index] |=
+ (above_txform_mask[0][tx_size_uv_horz] << shift);
+ }
+ }
+ // store other info
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize]; ++r) {
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize];
+ c += tx_size_wide_unit[mbmi->tx_size]) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ const uint64_t mask = ((uint64_t)1 << shift);
+ if (mbmi->skip && is_inter_block(mbmi)) lfm->skip.bits[index] |= mask;
+ if (r == mi_row) lfm->is_horz_border.bits[index] |= mask;
+ if (c == mi_col) lfm->is_vert_border.bits[index] |= mask;
+ }
+ }
+ for (int c = mi_col; c < mi_col + mi_size_wide[bsize]; ++c) {
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize];
+ r += tx_size_high_unit[mbmi->tx_size]) {
+ int index = 0;
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ const int shift = get_index_shift(col, row, &index);
+ const uint64_t mask = ((uint64_t)1 << shift);
+ if (mbmi->skip && is_inter_block(mbmi)) lfm->skip.bits[index] |= mask;
+ if (r == mi_row) lfm->is_horz_border.bits[index] |= mask;
+ if (c == mi_col) lfm->is_vert_border.bits[index] |= mask;
+ }
+ }
+ const uint8_t level_vert_y = get_filter_level(cm, &cm->lf_info, 0, 0, mbmi);
+ const uint8_t level_vert_u = get_filter_level(cm, &cm->lf_info, 0, 1, mbmi);
+ const uint8_t level_vert_v = get_filter_level(cm, &cm->lf_info, 0, 2, mbmi);
+ const uint8_t level_horz_y = get_filter_level(cm, &cm->lf_info, 1, 0, mbmi);
+ const uint8_t level_horz_u = get_filter_level(cm, &cm->lf_info, 1, 1, mbmi);
+ const uint8_t level_horz_v = get_filter_level(cm, &cm->lf_info, 1, 2, mbmi);
+ const int col_start = mi_col % MI_SIZE_64X64;
+ for (int r = mi_row; r < mi_row + mi_size_high[bsize]; r++) {
+ const int row = r % MI_SIZE_64X64;
+ memset(&lfm->lfl_y_ver[row][col_start], level_vert_y,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_u_ver[row][col_start], level_vert_u,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_v_ver[row][col_start], level_vert_v,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_y_hor[row][col_start], level_horz_y,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_u_hor[row][col_start], level_horz_u,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ memset(&lfm->lfl_v_hor[row][col_start], level_horz_v,
+ sizeof(uint8_t) * mi_size_wide[bsize]);
+ }
+ }
+}
+#endif
+
static void parse_decode_block(AV1Decoder *const pbi, ThreadData *const td,
int mi_row, int mi_col, aom_reader *r,
PARTITION_TYPE partition, BLOCK_SIZE bsize) {
@@ -1353,12 +1608,18 @@
for (int idy = 0; idy < height; idy += bh)
for (int idx = 0; idx < width; idx += bw)
read_tx_size_vartx(xd, mbmi, max_tx_size, 0, idy, idx, r);
+#if LOOP_FILTER_BITMASK
+ store_bitmask_info(cm, mi_row, mi_col, bsize, mbmi, 0);
+#endif
} else {
mbmi->tx_size = read_tx_size(cm, xd, inter_block_tx, !mbmi->skip, r);
if (inter_block_tx)
memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h,
mbmi->skip && is_inter_block(mbmi), xd);
+#if LOOP_FILTER_BITMASK
+ store_bitmask_info(cm, mi_row, mi_col, bsize, mbmi, 1);
+#endif
}
if (cm->delta_q_present_flag) {
@@ -5199,6 +5460,11 @@
const int tile_count_tg = end_tile - start_tile + 1;
if (initialize_flag) setup_frame_info(pbi);
+ const int num_planes = av1_num_planes(cm);
+#if LOOP_FILTER_BITMASK
+ av1_loop_filter_frame_init(cm, 0, num_planes);
+ av1_zero_array(cm->lf.lfm, cm->lf.lfm_num);
+#endif
if (pbi->max_threads > 1 && !(cm->large_scale_tile && !pbi->ext_tile_debug) &&
pbi->row_mt)
@@ -5210,7 +5476,6 @@
else
*p_data_end = decode_tiles(pbi, data, data_end, start_tile, end_tile);
- const int num_planes = av1_num_planes(cm);
// If the bit stream is monochrome, set the U and V buffers to a constant.
if (num_planes < 3) {
set_planes_to_neutral_grey(&cm->seq_params, xd->cur_buf, 1);