Implement tpl model based delta quantization for key frames
BDRate on top of delta quantization for first layer altref
33 frames, speed 0
lowres: -0.196%(avg psnr), -0.291%(ovr psnr)
midres: -0.109%(avg_psnr), -0.245%(ovr psnr)
Includes some changes in row_mt to resolve some test failures.
STATS_CHANGED
Change-Id: Ib69c828139d0dcf6c098fd381e6f5ed7271446d2
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index ee324dd..6378bc0 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -1100,20 +1100,6 @@
// parameter should be used with caution.
frame_params.speed = oxcf->speed;
- if (!frame_params.show_existing_frame) {
- cm->using_qmatrix = cpi->oxcf.using_qm;
- cm->min_qmlevel = cpi->oxcf.qm_minlevel;
- cm->max_qmlevel = cpi->oxcf.qm_maxlevel;
- if (oxcf->pass == 2) {
- if (cpi->twopass.gf_group.index == 1 && cpi->oxcf.enable_tpl_model) {
- av1_configure_buffer_updates(cpi, &frame_params, frame_update_type, 0);
- av1_set_frame_size(cpi, cm->width, cm->height);
- av1_tpl_setup_stats(cpi, &frame_input);
- assert(cpi->num_gf_group_show_frames == 1);
- }
- }
- }
-
// Work out some encoding parameters specific to the pass:
if (oxcf->pass == 0) {
if (cpi->oxcf.rc_mode == AOM_CBR) {
@@ -1178,6 +1164,29 @@
memcpy(frame_params.remapped_ref_idx, cm->remapped_ref_idx,
REF_FRAMES * sizeof(*cm->remapped_ref_idx));
+#if ENABLE_KF_TPL
+ if (oxcf->pass == 2 && frame_params.frame_type == KEY_FRAME &&
+ frame_params.show_frame) {
+ av1_configure_buffer_updates(cpi, &frame_params, frame_update_type, 0);
+ av1_set_frame_size(cpi, cm->width, cm->height);
+ av1_tpl_setup_stats(cpi, &frame_input, 1);
+ }
+#endif // ENABLE_KF_TPL
+
+ if (!frame_params.show_existing_frame) {
+ cm->using_qmatrix = cpi->oxcf.using_qm;
+ cm->min_qmlevel = cpi->oxcf.qm_minlevel;
+ cm->max_qmlevel = cpi->oxcf.qm_maxlevel;
+ if (oxcf->pass == 2) {
+ if (cpi->twopass.gf_group.index == 1 && cpi->oxcf.enable_tpl_model) {
+ av1_configure_buffer_updates(cpi, &frame_params, frame_update_type, 0);
+ av1_set_frame_size(cpi, cm->width, cm->height);
+ av1_tpl_setup_stats(cpi, &frame_input, 0);
+ assert(cpi->num_gf_group_show_frames == 1);
+ }
+ }
+ }
+
if (av1_encode(cpi, dest, &frame_input, &frame_params, &frame_results) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index b817409..331ce0f 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4809,6 +4809,7 @@
cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read_dummy;
cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write_dummy;
cpi->row_mt = 0;
+
if (cpi->oxcf.row_mt && (cpi->oxcf.max_threads > 1) &&
!cm->delta_q_info.delta_q_present_flag) {
cpi->row_mt = 1;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 2896519..3f7fce2 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3556,52 +3556,49 @@
static void process_tpl_stats_frame(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
- if (cpi->twopass.gf_group.index &&
- cpi->twopass.gf_group.index < MAX_LAG_BUFFERS &&
- cpi->oxcf.enable_tpl_model && cpi->tpl_model_pass == 0) {
- assert(IMPLIES(cpi->twopass.gf_group.size > 0,
- cpi->twopass.gf_group.index < cpi->twopass.gf_group.size));
- const int tpl_idx =
- cpi->twopass.gf_group.frame_disp_idx[cpi->twopass.gf_group.index];
- TplDepFrame *tpl_frame = &cpi->tpl_stats[tpl_idx];
- TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
- if (tpl_frame->is_valid) {
- int tpl_stride = tpl_frame->stride;
- int64_t intra_cost_base = 0;
- int64_t mc_dep_cost_base = 0;
- int64_t mc_saved_base = 0;
- int64_t mc_count_base = 0;
- int row, col;
+ assert(IMPLIES(cpi->twopass.gf_group.size > 0,
+ cpi->twopass.gf_group.index < cpi->twopass.gf_group.size));
+ const int tpl_idx =
+ cpi->twopass.gf_group.frame_disp_idx[cpi->twopass.gf_group.index];
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[tpl_idx];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
- for (row = 0; row < cm->mi_rows; ++row) {
- for (col = 0; col < cm->mi_cols; ++col) {
- TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
- intra_cost_base += this_stats->intra_cost;
- mc_dep_cost_base += this_stats->intra_cost + this_stats->mc_flow;
- mc_count_base += this_stats->mc_count;
- mc_saved_base += this_stats->mc_saved;
- }
+ if (tpl_frame->is_valid) {
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost_base = 0;
+ int64_t mc_dep_cost_base = 0;
+ int64_t mc_saved_base = 0;
+ int64_t mc_count_base = 0;
+ int row, col;
+
+ for (row = 0; row < cm->mi_rows; ++row) {
+ for (col = 0; col < cm->mi_cols; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+ intra_cost_base += this_stats->intra_cost;
+ mc_dep_cost_base += this_stats->intra_cost + this_stats->mc_flow;
+ mc_count_base += this_stats->mc_count;
+ mc_saved_base += this_stats->mc_saved;
}
+ }
- if (mc_dep_cost_base == 0) {
- tpl_frame->is_valid = 0;
- } else {
- aom_clear_system_state();
- cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
- if (is_frame_arf_and_tpl_eligible(cpi)) {
- cpi->rd.arf_r0 = cpi->rd.r0;
- const int gfu_boost = get_gfu_boost_from_r0(cpi->rd.arf_r0);
- // printf("old boost %d new boost %d\n", cpi->rc.gfu_boost,
- // gfu_boost);
- cpi->rc.gfu_boost = (cpi->rc.gfu_boost + gfu_boost) / 2;
- }
- cpi->rd.mc_count_base =
- (double)mc_count_base / (cm->mi_rows * cm->mi_cols);
- cpi->rd.mc_saved_base =
- (double)mc_saved_base / (cm->mi_rows * cm->mi_cols);
- aom_clear_system_state();
+ if (mc_dep_cost_base == 0) {
+ tpl_frame->is_valid = 0;
+ } else {
+ aom_clear_system_state();
+ cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
+ if (is_frame_arf_and_tpl_eligible(cpi)) {
+ cpi->rd.arf_r0 = cpi->rd.r0;
+ const int gfu_boost = get_gfu_boost_from_r0(cpi->rd.arf_r0);
+ // printf("old boost %d new boost %d\n", cpi->rc.gfu_boost,
+ // gfu_boost);
+ cpi->rc.gfu_boost = (cpi->rc.gfu_boost + gfu_boost) / 2;
}
+ cpi->rd.mc_count_base =
+ (double)mc_count_base / (cm->mi_rows * cm->mi_cols);
+ cpi->rd.mc_saved_base =
+ (double)mc_saved_base / (cm->mi_rows * cm->mi_cols);
+ aom_clear_system_state();
}
}
}
@@ -3614,7 +3611,9 @@
// Setup variables that depend on the dimensions of the frame.
av1_set_speed_features_framesize_dependent(cpi, cpi->speed);
- if (is_frame_tpl_eligible(cpi)) process_tpl_stats_frame(cpi);
+ if (cpi->oxcf.enable_tpl_model && cpi->tpl_model_pass == 0 &&
+ is_frame_tpl_eligible(cpi))
+ process_tpl_stats_frame(cpi);
// Decide q and q bounds.
*q = av1_rc_pick_q_and_bounds(cpi, cm->width, cm->height, bottom_index,
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 1d235b7..7f4165c 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1324,17 +1324,15 @@
// field.
aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi);
+#define ENABLE_KF_TPL 1
#define MAX_PYR_LEVEL_FROMTOP_DELTAQ 0
-static INLINE int is_frame_tpl_eligible(AV1_COMP *const cpi) {
- const int max_pyr_level_fromtop_deltaq = MAX_PYR_LEVEL_FROMTOP_DELTAQ;
- const int pyr_lev_from_top =
- cpi->twopass.gf_group.pyramid_height -
- cpi->twopass.gf_group.pyramid_level[cpi->twopass.gf_group.index];
- if (pyr_lev_from_top > max_pyr_level_fromtop_deltaq ||
- cpi->twopass.gf_group.pyramid_height <= max_pyr_level_fromtop_deltaq + 1)
- return 0;
- else
+
+static INLINE int is_frame_kf_and_tpl_eligible(AV1_COMP *const cpi) {
+ AV1_COMMON *cm = &cpi->common;
+ if (cm->current_frame.frame_type == KEY_FRAME && cm->show_frame)
return 1;
+ else
+ return 0;
}
static INLINE int is_frame_arf_and_tpl_eligible(AV1_COMP *const cpi) {
@@ -1349,6 +1347,15 @@
return 1;
}
+static INLINE int is_frame_tpl_eligible(AV1_COMP *const cpi) {
+#if ENABLE_KF_TPL
+ return is_frame_kf_and_tpl_eligible(cpi) ||
+ is_frame_arf_and_tpl_eligible(cpi);
+#else
+ return is_frame_arf_and_tpl_eligible(cpi);
+#endif // ENABLE_KF_TPL
+}
+
#if CONFIG_COLLECT_PARTITION_STATS == 2
static INLINE void av1_print_partition_stats(PartitionStats *part_stats) {
FILE *f = fopen("partition_stats.csv", "w");
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 259746b..1c170a8 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -385,7 +385,7 @@
aom_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
#if CONFIG_MULTITHREAD
- if (cpi->row_mt == 1) {
+ if (cpi->oxcf.row_mt == 1) {
if (cpi->row_mt_mutex_ == NULL) {
CHECK_MEM_ERROR(cm, cpi->row_mt_mutex_,
aom_malloc(sizeof(*(cpi->row_mt_mutex_))));
@@ -473,7 +473,7 @@
// Main thread acts as a worker and uses the thread data in cpi.
thread_data->td = &cpi->td;
}
- if (cpi->row_mt == 1)
+ if (cpi->oxcf.row_mt == 1)
CHECK_MEM_ERROR(
cm, thread_data->td->tctx,
(FRAME_CONTEXT *)aom_memalign(16, sizeof(*thread_data->td->tctx)));
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 9d8a215..2417ec8 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -253,14 +253,20 @@
}
}
best_intra_cost = AOMMAX(best_intra_cost, 1);
- best_inter_cost = AOMMIN(best_intra_cost, (int64_t)best_inter_cost_weighted);
+ if (frame_idx == 0)
+ best_inter_cost = 0;
+ else
+ best_inter_cost =
+ AOMMIN(best_intra_cost, (int64_t)best_inter_cost_weighted);
tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
- const int idx = gf_group->ref_frame_gop_idx[frame_idx][best_rf_idx];
- tpl_stats->ref_frame_index = idx;
- tpl_stats->ref_disp_frame_index = cpi->twopass.gf_group.frame_disp_idx[idx];
- tpl_stats->mv.as_int = best_mv.as_int;
+ if (frame_idx) {
+ const int idx = gf_group->ref_frame_gop_idx[frame_idx][best_rf_idx];
+ tpl_stats->ref_frame_index = idx;
+ tpl_stats->ref_disp_frame_index = cpi->twopass.gf_group.frame_disp_idx[idx];
+ tpl_stats->mv.as_int = best_mv.as_int;
+ }
}
static int round_floor(int ref_pos, int bsize_pix) {
@@ -466,6 +472,8 @@
&sf, this_frame->y_crop_width, this_frame->y_crop_height,
this_frame->y_crop_width, this_frame->y_crop_height);
+ xd->cur_buf = this_frame;
+
if (is_cur_buf_hbd(xd))
predictor = CONVERT_TO_BYTEPTR(predictor16);
else
@@ -490,7 +498,7 @@
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
- xd->cur_buf = this_frame;
+ xd->block_ref_scale_factors[0] = &sf;
const int base_qindex = gf_group->q_val[frame_idx];
// Get rd multiplier set up.
@@ -528,8 +536,9 @@
tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
tpl_frame->stride, &tpl_stats);
- tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
- bsize);
+ if (frame_idx)
+ tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row,
+ mi_col, bsize);
}
}
}
@@ -539,7 +548,8 @@
static void init_gop_frames_for_tpl(AV1_COMP *cpi,
YV12_BUFFER_CONFIG **gf_picture,
GF_GROUP *gf_group, int *tpl_group_frames,
- const EncodeFrameInput *const frame_input) {
+ const EncodeFrameInput *const frame_input,
+ int is_for_kf) {
AV1_COMMON *cm = &cpi->common;
const SequenceHeader *const seq_params = &cm->seq_params;
int frame_idx = 0;
@@ -564,16 +574,20 @@
*tpl_group_frames = 0;
- // Initialize Golden reference frame.
- RefCntBuffer *ref_buf = get_ref_frame_buf(cm, GOLDEN_FRAME);
- gf_picture[0] = &ref_buf->buf;
- ++*tpl_group_frames;
+ if (!is_for_kf) {
+ // Initialize Golden reference frame.
+ RefCntBuffer *ref_buf = get_ref_frame_buf(cm, GOLDEN_FRAME);
+ gf_picture[0] = &ref_buf->buf;
+ ++*tpl_group_frames;
+ }
+
+ int start_idx = !is_for_kf;
// Initialize frames in the GF group
- for (frame_idx = 1;
+ for (frame_idx = start_idx;
frame_idx <= AOMMIN(gf_group->size, MAX_LENGTH_TPL_FRAME_STATS - 1);
++frame_idx) {
- if (frame_idx == 1) {
+ if (frame_idx == start_idx) {
gf_picture[frame_idx] = frame_input->source;
frame_disp_idx = gf_group->frame_disp_idx[frame_idx];
} else {
@@ -599,6 +613,8 @@
++*tpl_group_frames;
}
+ if (is_for_kf) return;
+
if (frame_idx < MAX_LENGTH_TPL_FRAME_STATS) {
++frame_disp_idx;
int extend_frame_count = 0;
@@ -663,8 +679,7 @@
}
static void init_tpl_stats(AV1_COMP *cpi) {
- int frame_idx;
- for (frame_idx = 0; frame_idx < MAX_LENGTH_TPL_FRAME_STATS; ++frame_idx) {
+ for (int frame_idx = 0; frame_idx < MAX_LENGTH_TPL_FRAME_STATS; ++frame_idx) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
memset(tpl_frame->tpl_stats_ptr, 0,
tpl_frame->height * tpl_frame->width *
@@ -674,19 +689,20 @@
}
void av1_tpl_setup_stats(AV1_COMP *cpi,
- const EncodeFrameInput *const frame_input) {
+ const EncodeFrameInput *const frame_input,
+ int is_for_kf) {
YV12_BUFFER_CONFIG *gf_picture[MAX_LENGTH_TPL_FRAME_STATS];
GF_GROUP *gf_group = &cpi->twopass.gf_group;
- int frame_idx;
init_gop_frames_for_tpl(cpi, gf_picture, gf_group, &cpi->tpl_gf_group_frames,
- frame_input);
+ frame_input, is_for_kf);
init_tpl_stats(cpi);
if (cpi->oxcf.enable_tpl_model == 1) {
// Backward propagation from tpl_group_frames to 1.
- for (frame_idx = cpi->tpl_gf_group_frames - 1; frame_idx > 0; --frame_idx) {
+ for (int frame_idx = cpi->tpl_gf_group_frames - 1; frame_idx >= !is_for_kf;
+ --frame_idx) {
if (gf_group->update_type[frame_idx] == OVERLAY_UPDATE ||
gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE)
continue;
@@ -733,6 +749,7 @@
xd->left_mbmi = NULL;
xd->mi[0]->sb_type = bsize;
xd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
+ xd->block_ref_scale_factors[0] = &sf;
for (int mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
// Motion estimation row boundary
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index 4732d1c..36be9ba 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -17,7 +17,8 @@
#endif
void av1_tpl_setup_stats(AV1_COMP *cpi,
- const EncodeFrameInput *const frame_input);
+ const EncodeFrameInput *const frame_input,
+ int is_for_kf);
void av1_tpl_setup_forward_stats(AV1_COMP *cpi);