Add tpl_bsize in TplParams
Added tpl_bsize_1d in TplParams and made tpl block size used in tpl
motion estimation configurable. Resolved places that used a hard-coded
16x16 tpl block size. This CL wouldn't cause any bitstream change.
Change-Id: I80521783c81820b4aaf72ce1c83356fc62546fe5
diff --git a/aom_scale/yv12config.h b/aom_scale/yv12config.h
index dfd4e51..ea92c92 100644
--- a/aom_scale/yv12config.h
+++ b/aom_scale/yv12config.h
@@ -29,7 +29,6 @@
#define AOM_INTERP_EXTEND 4
#define AOM_BORDER_IN_PIXELS 288
#define AOM_ENC_NO_SCALE_BORDER 160
-#define AOM_ENC_TPL_FRAME_BORDER 32
#define AOM_DEC_BORDER_IN_PIXELS 64
/*!\endcond */
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 7283058..76fbaac 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -31,12 +31,10 @@
extern "C" {
#endif
-//! Linear dimension of a tpl block
-#define MC_FLOW_BSIZE_1D 16
-//! Number of pixels in a tpl block
-#define MC_FLOW_NUM_PELS (MC_FLOW_BSIZE_1D * MC_FLOW_BSIZE_1D)
-//! Number of tpl block in a super block
-#define MAX_MC_FLOW_BLK_IN_SB (MAX_SB_SIZE / MC_FLOW_BSIZE_1D)
+//! Minimum linear dimension of a tpl block
+#define MIN_TPL_BSIZE_1D 16
+//! Maximum number of tpl block in a super block
+#define MAX_TPL_BLK_IN_SB (MAX_SB_SIZE / MIN_TPL_BSIZE_1D)
//! Number of intra winner modes kept
#define MAX_WINNER_MODE_COUNT_INTRA 3
//! Number of inter winner modes kept
@@ -61,19 +59,18 @@
/*****************************************************************************
* \name TPL Info
*
- * Information gathered from tpl_model at MC_FLOW_BSIZE_1D precision for the
+ * Information gathered from tpl_model at tpl block precision for the
* superblock to speed up the encoding process..
****************************************************************************/
/**@{*/
//! Number of TPL blocks in this superblock.
int tpl_data_count;
//! TPL's estimate of inter cost for each tpl block.
- int64_t tpl_inter_cost[MAX_MC_FLOW_BLK_IN_SB * MAX_MC_FLOW_BLK_IN_SB];
+ int64_t tpl_inter_cost[MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB];
//! TPL's estimate of tpl cost for each tpl block.
- int64_t tpl_intra_cost[MAX_MC_FLOW_BLK_IN_SB * MAX_MC_FLOW_BLK_IN_SB];
+ int64_t tpl_intra_cost[MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB];
//! Motion vectors found by TPL model for each tpl block.
- int_mv tpl_mv[MAX_MC_FLOW_BLK_IN_SB * MAX_MC_FLOW_BLK_IN_SB]
- [INTER_REFS_PER_FRAME];
+ int_mv tpl_mv[MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB][INTER_REFS_PER_FRAME];
//! TPL's stride for the arrays in this struct.
int tpl_stride;
/**@}*/
@@ -1139,16 +1136,6 @@
#undef SINGLE_REF_MODES
/*!\cond */
-
-static INLINE int tpl_blocks_in_sb(BLOCK_SIZE bsize) {
- switch (bsize) {
- case BLOCK_64X64: return 16;
- case BLOCK_128X128: return 64;
- default: assert(0);
- }
- return -1;
-}
-
static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
static const char LUT[BLOCK_SIZES_ALL] = {
0, // BLOCK_4X4
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 695084f..e367245 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -818,7 +818,7 @@
// TPL store unit size is not the same as the motion estimation unit size.
// Here always use motion estimation size to avoid getting repetitive inter/
// intra cost.
- const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
+ const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
const int step = mi_size_wide[tpl_bsize];
assert(mi_size_wide[tpl_bsize] == mi_size_high[tpl_bsize]);
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 18b8c7b..4713b8b 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -100,20 +100,26 @@
}
static AOM_INLINE void set_tpl_stats_block_size(int width, int height,
- uint8_t *block_mis_log2) {
+ uint8_t *block_mis_log2,
+ uint8_t *tpl_bsize_1d) {
const int is_720p_or_larger = AOMMIN(width, height) >= 720;
// 0: 4x4, 1: 8x8, 2: 16x16
*block_mis_log2 = is_720p_or_larger ? 2 : 1;
+ // Block size used in tpl motion estimation
+ *tpl_bsize_1d = 16;
+ assert(*tpl_bsize_1d >= 16);
}
static AOM_INLINE void setup_tpl_buffers(AV1_COMMON *const cm,
TplParams *const tpl_data) {
CommonModeInfoParams *const mi_params = &cm->mi_params;
set_tpl_stats_block_size(cm->width, cm->height,
- &tpl_data->tpl_stats_block_mis_log2);
+ &tpl_data->tpl_stats_block_mis_log2,
+ &tpl_data->tpl_bsize_1d);
const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
- tpl_data->border_in_pixels = AOM_ENC_TPL_FRAME_BORDER;
+ tpl_data->border_in_pixels =
+ ALIGN_POWER_OF_TWO(tpl_data->tpl_bsize_1d + 2 * AOM_INTERP_EXTEND, 5);
for (int frame = 0; frame < MAX_LENGTH_TPL_FRAME_STATS; ++frame) {
const int mi_cols =
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 4eaa0ad..9209159 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -1176,7 +1176,7 @@
MACROBLOCK *x = &thread_data->td->mb;
MACROBLOCKD *xd = &x->e_mbd;
CommonModeInfoParams *mi_params = &cm->mi_params;
- BLOCK_SIZE bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
+ BLOCK_SIZE bsize = convert_length_to_bsize(cpi->tpl_data.tpl_bsize_1d);
TX_SIZE tx_size = max_txsize_lookup[bsize];
int mi_height = mi_size_high[bsize];
int num_active_workers = cpi->tpl_data.tpl_mt_sync.num_threads_working;
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 9dbca7e..dcb12e5 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -96,9 +96,8 @@
start_mv = get_fullmv_from_mv(&ref_mv);
// cand stores start_mv and all possible MVs in a SB.
- cand_mv_t cand[MAX_MC_FLOW_BLK_IN_SB * MAX_MC_FLOW_BLK_IN_SB + 1] = {
- { { 0, 0 }, 0 }
- };
+ cand_mv_t cand[MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB + 1] = { { { 0, 0 },
+ 0 } };
cand[0].fmv = start_mv;
int cnt = 1;
int total_weight = 0;
@@ -107,7 +106,8 @@
mbmi->motion_mode == SIMPLE_TRANSLATION) {
SuperBlockEnc *sb_enc = &x->sb_enc;
if (sb_enc->tpl_data_count) {
- const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
+ const BLOCK_SIZE tpl_bsize =
+ convert_length_to_bsize(cpi->tpl_data.tpl_bsize_1d);
const int tplw = mi_size_wide[tpl_bsize];
const int tplh = mi_size_high[tpl_bsize];
const int nw = mi_size_wide[bsize] / tplw;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index ebe8a44..909bd00 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -4983,18 +4983,21 @@
(AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
if (do_pruning && sf->intra_sf.skip_intra_in_interframe) {
// Only consider full SB.
- int len = tpl_blocks_in_sb(cm->seq_params.sb_size);
+ const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const int tpl_bsize_1d = cpi->tpl_data.tpl_bsize_1d;
+ const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
+ (block_size_high[sb_size] / tpl_bsize_1d);
SuperBlockEnc *sb_enc = &x->sb_enc;
if (sb_enc->tpl_data_count == len) {
- const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
+ const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
const int tpl_stride = sb_enc->tpl_stride;
const int tplw = mi_size_wide[tpl_bsize];
const int tplh = mi_size_high[tpl_bsize];
const int nw = mi_size_wide[bsize] / tplw;
const int nh = mi_size_high[bsize] / tplh;
if (nw >= 1 && nh >= 1) {
- const int of_h = mi_row % mi_size_high[cm->seq_params.sb_size];
- const int of_w = mi_col % mi_size_wide[cm->seq_params.sb_size];
+ const int of_h = mi_row % mi_size_high[sb_size];
+ const int of_w = mi_col % mi_size_wide[sb_size];
const int start = of_h / tplh * tpl_stride + of_w / tplw;
for (int k = 0; k < nh; k++) {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 4890f37..77612d6 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -254,13 +254,16 @@
uint8_t *dst_buffer = tpl_frame->rec_picture->y_buffer + dst_mb_offset;
const int dst_buffer_stride = tpl_frame->rec_picture->y_stride;
- // Temporaray buffers
- DECLARE_ALIGNED(32, uint8_t, predictor8[MC_FLOW_NUM_PELS * 2]);
- DECLARE_ALIGNED(32, int16_t, src_diff[MC_FLOW_NUM_PELS]);
- DECLARE_ALIGNED(32, tran_low_t, coeff[MC_FLOW_NUM_PELS]);
- DECLARE_ALIGNED(32, tran_low_t, qcoeff[MC_FLOW_NUM_PELS]);
- DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MC_FLOW_NUM_PELS]);
- DECLARE_ALIGNED(32, tran_low_t, best_coeff[MC_FLOW_NUM_PELS]);
+ // Number of pixels in a tpl block
+ const int tpl_block_pels = tpl_data->tpl_bsize_1d * tpl_data->tpl_bsize_1d;
+ // Allocate temporary buffers used in motion estimation.
+ uint8_t *predictor8 = aom_memalign(32, tpl_block_pels * 2 * sizeof(uint8_t));
+ int16_t *src_diff = aom_memalign(32, tpl_block_pels * sizeof(int16_t));
+ tran_low_t *coeff = aom_memalign(32, tpl_block_pels * sizeof(tran_low_t));
+ tran_low_t *qcoeff = aom_memalign(32, tpl_block_pels * sizeof(tran_low_t));
+ tran_low_t *dqcoeff = aom_memalign(32, tpl_block_pels * sizeof(tran_low_t));
+ tran_low_t *best_coeff =
+ aom_memalign(32, tpl_block_pels * sizeof(tran_low_t));
uint8_t *predictor =
is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8;
int64_t recon_error = 1, sse = 1;
@@ -444,7 +447,7 @@
tpl_stats->pred_error[rf_idx] = AOMMAX(1, inter_cost);
if (inter_cost < best_inter_cost) {
- memcpy(best_coeff, coeff, sizeof(best_coeff));
+ memcpy(best_coeff, coeff, tpl_block_pels * sizeof(best_coeff[0]));
best_rf_idx = rf_idx;
best_inter_cost = inter_cost;
@@ -526,6 +529,14 @@
}
}
}
+
+ // Free temporary buffers.
+ aom_free(predictor8);
+ aom_free(src_diff);
+ aom_free(coeff);
+ aom_free(qcoeff);
+ aom_free(dqcoeff);
+ aom_free(best_coeff);
}
static int round_floor(int ref_pos, int bsize_pix) {
@@ -679,13 +690,13 @@
const int mi_height = mi_size_high[bsize];
const int mi_width = mi_size_wide[bsize];
const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
- const BLOCK_SIZE tpl_block_size =
+ const BLOCK_SIZE tpl_stats_block_size =
convert_length_to_bsize(MI_SIZE << tpl_data->tpl_stats_block_mis_log2);
for (int idy = 0; idy < mi_height; idy += step) {
for (int idx = 0; idx < mi_width; idx += step) {
- tpl_model_update_b(tpl_data, mi_row + idy, mi_col + idx, tpl_block_size,
- frame_idx);
+ tpl_model_update_b(tpl_data, mi_row + idy, mi_col + idx,
+ tpl_stats_block_size, frame_idx);
}
}
}
@@ -855,12 +866,15 @@
TplParams *const tpl_data = &cpi->tpl_data;
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_data->frame_idx];
MACROBLOCKD *xd = &x->e_mbd;
- const int mb_cols_in_tile = mi_params->mb_cols;
- const int mb_row = (mi_row + 2) >> 2;
- for (int mi_col = 0, mb_col_in_tile = 0; mi_col < mi_params->mi_cols;
- mi_col += mi_width, mb_col_in_tile++) {
- (*tpl_row_mt->sync_read_ptr)(&tpl_data->tpl_mt_sync, mb_row,
- mb_col_in_tile);
+
+ const int tplb_cols_in_tile =
+ ROUND_POWER_OF_TWO(mi_params->mi_cols, mi_size_wide_log2[bsize]);
+ const int tplb_row = ROUND_POWER_OF_TWO(mi_row, mi_size_high_log2[bsize]);
+
+ for (int mi_col = 0, tplb_col_in_tile = 0; mi_col < mi_params->mi_cols;
+ mi_col += mi_width, tplb_col_in_tile++) {
+ (*tpl_row_mt->sync_read_ptr)(&tpl_data->tpl_mt_sync, tplb_row,
+ tplb_col_in_tile);
TplDepStats tpl_stats;
// Motion estimation column boundary
@@ -875,8 +889,8 @@
tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
tpl_frame->stride, &tpl_stats,
tpl_data->tpl_stats_block_mis_log2);
- (*tpl_row_mt->sync_write_ptr)(&tpl_data->tpl_mt_sync, mb_row,
- mb_col_in_tile, mb_cols_in_tile);
+ (*tpl_row_mt->sync_write_ptr)(&tpl_data->tpl_mt_sync, tplb_row,
+ tplb_col_in_tile, tplb_cols_in_tile);
}
}
@@ -886,7 +900,7 @@
ThreadData *td = &cpi->td;
MACROBLOCK *x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd;
- const BLOCK_SIZE bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
+ const BLOCK_SIZE bsize = convert_length_to_bsize(cpi->tpl_data.tpl_bsize_1d);
const TX_SIZE tx_size = max_txsize_lookup[bsize];
const int mi_height = mi_size_high[bsize];
for (int mi_row = 0; mi_row < mi_params->mi_rows; mi_row += mi_height) {
@@ -908,7 +922,7 @@
TplParams *const tpl_data = &cpi->tpl_data;
- const BLOCK_SIZE bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
+ const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
const int mi_height = mi_size_high[bsize];
const int mi_width = mi_size_wide[bsize];
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index 1557c50..b4d3db2 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -124,6 +124,11 @@
uint8_t tpl_stats_block_mis_log2;
/*!
+ * Tpl motion estimation block 1d size. tpl_bsize_1d >= 16.
+ */
+ uint8_t tpl_bsize_1d;
+
+ /*!
* Buffer to store the frame level tpl information for each frame in a gf
* group. tpl_stats_buffer[i] stores the tpl information of ith frame in a gf
* group