SUPERRES_AUTO: Enable superres + TPL combo.
This includes TPL based modulation of both q index and rdmult.
BDRate baseline vs SUPERRES_AUTO mode for hdres2:
(1) VBR mode:
- before = +0.159 (per clip: -0.2 to +0.8)
- after = -0.057 (per clip: -1.7 to +0.5)
(2) Q mode:
- before = +0.112 (per clip: -0.40 to +0.70)
- after = +0.008 (per clip: -0.45 to +0.39)
BUG=aomedia:2844
Change-Id: I5ee1b7630e6765a701dfd3073e20c8ec7f7fadca
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index fb041d4..4aeca5d 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -339,7 +339,6 @@
if (tpl_frame->is_valid == 0) return;
if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return;
if (frame_idx >= MAX_TPL_FRAME_IDX) return;
- if (cpi->superres_mode != AOM_SUPERRES_NONE) return;
if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
const int is_overlay = cpi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
@@ -353,13 +352,21 @@
int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
const int step = 1 << block_mis_log2;
const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+
const int mi_row_end =
AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
- const int mi_col_end =
- AOMMIN(mi_size_wide[sb_size] + mi_col, mi_params->mi_cols);
-
- for (int row = mi_row; row < mi_row_end; row += step) {
- for (int col = mi_col; col < mi_col_end; col += step) {
+ const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
+ const int mi_col_sr =
+ coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
+ const int mi_col_end_sr =
+ AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
+ cm->superres_scale_denominator),
+ mi_cols_sr);
+ const int row_step = step;
+ const int col_step_sr =
+ coded_to_superres_mi(step, cm->superres_scale_denominator);
+ for (int row = mi_row; row < mi_row_end; row += row_step) {
+ for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
const TplDepStats *this_stats =
&tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 3e96bee..1ac5f0e 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -62,6 +62,26 @@
aom_clear_system_state();
}
+// Return the end column for the current superblock, in unit of TPL blocks.
+static int get_superblock_tpl_column_end(const AV1_COMMON *const cm, int mi_col,
+ int num_mi_w) {
+ // Find the start column of this superblock.
+ const int sb_mi_col_start = (mi_col >> cm->seq_params.mib_size_log2)
+ << cm->seq_params.mib_size_log2;
+ // Same but in superres upscaled dimension.
+ const int sb_mi_col_start_sr =
+ coded_to_superres_mi(sb_mi_col_start, cm->superres_scale_denominator);
+ // Width of this superblock in mi units.
+ const int sb_mi_width = mi_size_wide[cm->seq_params.sb_size];
+ // Same but in superres upscaled dimension.
+ const int sb_mi_width_sr =
+ coded_to_superres_mi(sb_mi_width, cm->superres_scale_denominator);
+ // Superblock end in mi units.
+ const int sb_mi_end = sb_mi_col_start_sr + sb_mi_width_sr;
+ // Superblock end in TPL units.
+ return (sb_mi_end + num_mi_w - 1) / num_mi_w;
+}
+
int av1_get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
const BLOCK_SIZE bsize, const int mi_row,
const int mi_col, int orig_rdmult) {
@@ -75,24 +95,34 @@
if (tpl_frame->is_valid == 0) return deltaq_rdmult;
if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return deltaq_rdmult;
if (tpl_idx >= MAX_TPL_FRAME_IDX) return deltaq_rdmult;
- if (cpi->superres_mode != AOM_SUPERRES_NONE) return deltaq_rdmult;
if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return deltaq_rdmult;
+ const int mi_col_sr =
+ coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
+ const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
+ const int block_mi_width_sr =
+ coded_to_superres_mi(mi_size_wide[bsize], cm->superres_scale_denominator);
+
const int bsize_base = BLOCK_16X16;
const int num_mi_w = mi_size_wide[bsize_base];
const int num_mi_h = mi_size_high[bsize_base];
- const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
+ const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w;
const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
- const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
+ const int num_bcols = (block_mi_width_sr + num_mi_w - 1) / num_mi_w;
const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
+ // This is required because the end col of superblock may be off by 1 in case
+ // of superres.
+ const int sb_bcol_end = get_superblock_tpl_column_end(cm, mi_col, num_mi_w);
int row, col;
double base_block_count = 0.0;
double geom_mean_of_scale = 0.0;
aom_clear_system_state();
for (row = mi_row / num_mi_w;
row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
- for (col = mi_col / num_mi_h;
- col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
+ for (col = mi_col_sr / num_mi_h;
+ col < num_cols && col < mi_col_sr / num_mi_h + num_bcols &&
+ col < sb_bcol_end;
+ ++col) {
const int index = row * num_cols + col;
geom_mean_of_scale += log(cpi->tpl_sb_rdmult_scaling_factors[index]);
base_block_count += 1.0;
@@ -673,8 +703,11 @@
coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
const int step = 1 << block_mis_log2;
- for (int row = mi_row; row < mi_row + mi_high; row += step) {
- for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
+ const int row_step = step;
+ const int col_step_sr =
+ coded_to_superres_mi(step, cm->superres_scale_denominator);
+ for (int row = mi_row; row < mi_row + mi_high; row += row_step) {
+ for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue;
TplDepStats *this_stats =
&tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
@@ -686,6 +719,7 @@
mi_count++;
}
}
+ assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
aom_clear_system_state();
@@ -773,7 +807,6 @@
sb_enc->tpl_data_count = 0;
if (!cpi->oxcf.algo_cfg.enable_tpl_model) return;
- if (cpi->superres_mode != AOM_SUPERRES_NONE) return;
if (cpi->common.current_frame.frame_type == KEY_FRAME) return;
const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
if (update_type == INTNL_OVERLAY_UPDATE || update_type == OVERLAY_UPDATE)
@@ -806,15 +839,18 @@
// Here always use motion estimation size to avoid getting repetitive inter/
// intra cost.
const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
- const int step = mi_size_wide[tpl_bsize];
assert(mi_size_wide[tpl_bsize] == mi_size_high[tpl_bsize]);
+ const int row_step = mi_size_high[tpl_bsize];
+ const int col_step_sr = coded_to_superres_mi(mi_size_wide[tpl_bsize],
+ cm->superres_scale_denominator);
// Stride is only based on SB size, and we fill in values for every 16x16
// block in a SB.
- sb_enc->tpl_stride = (mi_col_end_sr - mi_col_sr) / step;
+ sb_enc->tpl_stride = (mi_col_end_sr - mi_col_sr) / col_step_sr;
- for (int row = mi_row; row < mi_row + mi_high; row += step) {
- for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
+ for (int row = mi_row; row < mi_row + mi_high; row += row_step) {
+ for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
+ assert(count < MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
// Handle partial SB, so that no invalid values are used later.
if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) {
sb_enc->tpl_inter_cost[count] = INT64_MAX;
@@ -836,6 +872,7 @@
}
}
+ assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
sb_enc->tpl_data_count = mi_count;
}
@@ -874,8 +911,11 @@
coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
const int step = 1 << block_mis_log2;
- for (int row = mi_row; row < mi_row + mi_high; row += step) {
- for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
+ const int row_step = step;
+ const int col_step_sr =
+ coded_to_superres_mi(step, cm->superres_scale_denominator);
+ for (int row = mi_row; row < mi_row + mi_high; row += row_step) {
+ for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue;
TplDepStats *this_stats =
&tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
@@ -887,6 +927,7 @@
mi_count++;
}
}
+ assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
aom_clear_system_state();
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 28a1634..019facb 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -472,10 +472,13 @@
int64_t intra_cost_base = 0;
int64_t mc_dep_cost_base = 0;
const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
+ const int row_step = step;
+ const int col_step_sr =
+ coded_to_superres_mi(step, cm->superres_scale_denominator);
const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
- for (int row = 0; row < cm->mi_params.mi_rows; row += step) {
- for (int col = 0; col < mi_cols_sr; col += step) {
+ for (int row = 0; row < cm->mi_params.mi_rows; row += row_step) {
+ for (int col = 0; col < mi_cols_sr; col += col_step_sr) {
TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
int64_t mc_dep_delta =
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0973e1b..52594b5 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2249,10 +2249,13 @@
coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
+ const int row_step = step;
+ const int col_step_sr =
+ coded_to_superres_mi(step, cm->superres_scale_denominator);
for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
- row += step) {
+ row += row_step) {
for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
- col += step) {
+ col += col_step_sr) {
const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 6b4b8a2..8717024 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -205,11 +205,9 @@
void av1_inter_mode_data_init(struct TileDataEnc *tile_data);
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult);
-#if !CONFIG_REALTIME_ONLY
static INLINE int coded_to_superres_mi(int mi_col, int denom) {
return (mi_col * denom + SCALE_NUMERATOR / 2) / SCALE_NUMERATOR;
}
-#endif
static INLINE int av1_encoder_get_relative_dist(int a, int b) {
assert(a >= 0 && b >= 0);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 04f54bd..1ffecb7 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1128,7 +1128,10 @@
EncodeFrameParams this_frame_params = *frame_params;
TplParams *const tpl_data = &cpi->tpl_data;
- if (cpi->superres_mode != AOM_SUPERRES_NONE) return 0;
+ if (cpi->superres_mode != AOM_SUPERRES_NONE) {
+ assert(cpi->superres_mode != AOM_SUPERRES_AUTO);
+ return 0;
+ }
cm->current_frame.frame_type = frame_params->frame_type;
for (int gf_index = gf_group->index; gf_index < gf_group->size; ++gf_index) {
@@ -1210,10 +1213,13 @@
int64_t intra_cost_base = 0;
int64_t mc_dep_cost_base = 0;
const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
+ const int row_step = step;
+ const int col_step_sr =
+ coded_to_superres_mi(step, cm->superres_scale_denominator);
const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
- for (int row = 0; row < cm->mi_params.mi_rows; row += step) {
- for (int col = 0; col < mi_cols_sr; col += step) {
+ for (int row = 0; row < cm->mi_params.mi_rows; row += row_step) {
+ for (int col = 0; col < mi_cols_sr; col += col_step_sr) {
TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
int64_t mc_dep_delta =
@@ -1245,7 +1251,6 @@
const TplDepFrame *const tpl_frame = &tpl_data->tpl_frame[tpl_idx];
if (!tpl_frame->is_valid) return;
- if (cpi->superres_mode != AOM_SUPERRES_NONE) return;
const TplDepStats *const tpl_stats = tpl_frame->tpl_stats_ptr;
const int tpl_stride = tpl_frame->stride;
@@ -1301,15 +1306,20 @@
if (tpl_frame->is_valid == 0) return;
if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return;
if (tpl_idx >= MAX_TPL_FRAME_IDX) return;
- if (cpi->superres_mode != AOM_SUPERRES_NONE) return;
if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
+ const int mi_col_sr =
+ coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
+ const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
+ const int sb_mi_width_sr = coded_to_superres_mi(
+ mi_size_wide[sb_size], cm->superres_scale_denominator);
+
const int bsize_base = BLOCK_16X16;
const int num_mi_w = mi_size_wide[bsize_base];
const int num_mi_h = mi_size_high[bsize_base];
- const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
+ const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w;
const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
- const int num_bcols = (mi_size_wide[sb_size] + num_mi_w - 1) / num_mi_w;
+ const int num_bcols = (sb_mi_width_sr + num_mi_w - 1) / num_mi_w;
const int num_brows = (mi_size_high[sb_size] + num_mi_h - 1) / num_mi_h;
int row, col;
@@ -1319,8 +1329,8 @@
aom_clear_system_state();
for (row = mi_row / num_mi_w;
row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
- for (col = mi_col / num_mi_h;
- col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
+ for (col = mi_col_sr / num_mi_h;
+ col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) {
const int index = row * num_cols + col;
log_sum += log(cpi->tpl_rdmult_scaling_factors[index]);
base_block_count += 1.0;
@@ -1340,8 +1350,8 @@
for (row = mi_row / num_mi_w;
row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
- for (col = mi_col / num_mi_h;
- col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
+ for (col = mi_col_sr / num_mi_h;
+ col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) {
const int index = row * num_cols + col;
cpi->tpl_sb_rdmult_scaling_factors[index] =
scale_adj * cpi->tpl_rdmult_scaling_factors[index];