Reduce blk_skip array size
Reduce the blk_skip array size to the maximum txb numbers in a
coding block. This makes the speed 1 about 5% faster for smaller
resolution clips.
Change-Id: I1812dc4d403e08f0566ead8a14c6046e505bb849
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 646edb5..d438c98 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -247,8 +247,8 @@
// from extending outside the UMV borders
MvLimits mv_limits;
- uint8_t blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
- uint8_t blk_skip_drl[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ uint8_t blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE];
+ uint8_t blk_skip_drl[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE];
int skip;
int skip_chroma_rd;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index b789456..92894fd 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2595,7 +2595,7 @@
#if CONFIG_TXK_SEL
TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
#endif // CONFIG_TXK_SEL
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
const int n4 = bsize_to_num_blk(bs);
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
@@ -3290,7 +3290,7 @@
#else
TX_TYPE *best_txk_type = NULL;
#endif
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
@@ -4950,7 +4950,7 @@
TX_SIZE best_tx_size[INTER_TX_SIZE_BUF_LEN] = { 0 };
TX_SIZE best_tx = max_txsize_rect_lookup[1][bsize];
TX_SIZE best_min_tx_size = TX_SIZES_ALL;
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
TX_TYPE txk_start = DCT_DCT;
#if CONFIG_TXK_SEL
TX_TYPE txk_end = DCT_DCT + 1;
@@ -7666,7 +7666,7 @@
int rate2_nocoeff = 0, best_xskip, best_disable_skip = 0;
RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
MB_MODE_INFO base_mbmi, best_mbmi;
- uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
+ uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE];
int interintra_allowed =
cm->allow_interintra_compound && is_interintra_allowed(mbmi);
#if CONFIG_EXT_WARPED_MOTION
@@ -8111,7 +8111,7 @@
if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
for (int i = 0; i < num_planes; ++i)
memcpy(best_blk_skip[i], x->blk_skip[i],
- sizeof(best_blk_skip[i][0]) * xd->n8_h * xd->n8_w * 4);
+ sizeof(best_blk_skip[i][0]) * xd->n8_h * xd->n8_w);
best_xskip = x->skip;
best_disable_skip = *disable_skip;
}
@@ -8128,7 +8128,7 @@
if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
for (int i = 0; i < num_planes; ++i)
memcpy(x->blk_skip[i], best_blk_skip[i],
- sizeof(x->blk_skip[i][0]) * xd->n8_h * xd->n8_w * 4);
+ sizeof(x->blk_skip[i][0]) * xd->n8_h * xd->n8_w);
x->skip = best_xskip;
*disable_skip = best_disable_skip;
@@ -8258,7 +8258,7 @@
int64_t best_rd = INT64_MAX;
int best_compound_idx = 1;
int64_t best_ret_val = INT64_MAX;
- uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
+ uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE];
const MB_MODE_INFO backup_mbmi = *mbmi;
MB_MODE_INFO best_mbmi = *mbmi;
int64_t early_terminate = 0;
@@ -8740,7 +8740,7 @@
best_mbmi = *mbmi;
for (i = 0; i < num_planes; ++i)
memcpy(best_blk_skip[i], x->blk_skip[i],
- sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w);
}
}
}
@@ -8754,7 +8754,7 @@
*mbmi = best_mbmi;
for (i = 0; i < num_planes; ++i)
memcpy(x->blk_skip[i], best_blk_skip[i],
- sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w);
}
if (early_terminate == INT64_MAX) return INT64_MAX;
#endif // CONFIG_JNT_COMP
@@ -8822,7 +8822,7 @@
RD_STATS best_rdcost = *rd_cost;
int best_skip = x->skip;
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8] = { 0 };
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
dir < IBC_MOTION_DIRECTIONS; ++dir) {
const MvLimits tmp_mv_limits = x->mv_limits;
@@ -8937,7 +8937,7 @@
best_skip = x->skip;
best_rdcost = rdc_noskip;
memcpy(best_blk_skip, x->blk_skip[0],
- sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w * 4);
+ sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w);
}
if (!xd->lossless[mbmi->segment_id]) {
@@ -8954,7 +8954,7 @@
best_skip = x->skip;
best_rdcost = rdc_skip;
memcpy(best_blk_skip, x->blk_skip[0],
- sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w * 4);
+ sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w);
}
}
}
@@ -8962,7 +8962,7 @@
*rd_cost = best_rdcost;
x->skip = best_skip;
memcpy(x->blk_skip[0], best_blk_skip,
- sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w * 4);
+ sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w);
return best_rd;
}
#endif // CONFIG_INTRABC
@@ -9880,7 +9880,7 @@
skippable = rd_stats_y.skip;
#if CONFIG_FILTER_INTRA
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
memcpy(best_blk_skip, x->blk_skip[0],
sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
@@ -10525,7 +10525,7 @@
x->palette_buffer->best_palette_color_map;
uint8_t *const color_map = xd->plane[0].color_index_map;
MB_MODE_INFO best_mbmi_palette = *mbmi;
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
mbmi->mode = DC_PRED;
mbmi->uv_mode = UV_DC_PRED;