Integrate BITRATE_ACCURACY with three pass mode
Preliminary Results:
(There might be some hidden issues that cause mixed results
in 150 frames)
BITRATE_ACCURACY vs BITRATE_ACCURACY_BL-->av1 vbr mode with 3 pass on
vbr mode 150 frames
metric avg_psnr ovr_psnr ssim
ugc360p -0.994% -4.225% -0.606%
lowres 2.395% 1.492% 9.491%
midres 1.499% -0.089% 3.784%
hdres 0.876% -0.368% 2.554%
vbr mode 33 frames
metric avg_psnr ovr_psnr ssim
ugc360p -2.722% -3.767% -1.627%
lowres 0.149% -0.785% 6.130%
midres -0.806% -1.378% 1.338%
hdres -0.869% -1.620% -0.198%
vbr mode 17 frames
metric avg_psnr ovr_psnr ssim
ugc360p -3.684% -3.529% -2.729%
lowres 0.491% 0.680% 4.379%
midres -2.052% -3.011% -0.792%
hdres -1.341% -2.470% -0.917%
Notes:
0) The big performance drops in 33 frames and 150 frames are resolved
when using whole sequence's tpl stats.
1) The experiment shows significant gains in ugc360p 150 frames. This is
a good indicator that using shared base layer q index will lead to
better compression performance.
2) We see lowres 150 frames has 1.3% drop, this might be caused by some
hidden issues that required further investigation
3) We didn't report abs_rc_error because my testing process changed.
Haven't figure out how to get abs_rc_error yet. But I looked at several
several examples and figured that we might need to revisit the frame
bits model's accuracy again.
BUG=aomedia:3045
Change-Id: I9c2ac3c4945735c4d2ae8a9a08b91966c0978992
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 6af6b87..80c89b5 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -33,6 +33,9 @@
#include "av1/encoder/gop_structure.h"
#include "av1/encoder/pass2_strategy.h"
#include "av1/encoder/temporal_filter.h"
+#if CONFIG_THREE_PASS
+#include "av1/encoder/thirdpass.h"
+#endif // CONFIG_THREE_PASS
#include "av1/encoder/tpl_model.h"
#if CONFIG_TUNE_VMAF
@@ -1093,7 +1096,7 @@
if (!cpi->skip_tpl_setup_stats) {
av1_tpl_preload_rc_estimate(cpi, frame_params);
av1_tpl_setup_stats(cpi, 0, frame_params);
-#if CONFIG_BITRATE_ACCURACY
+#if CONFIG_BITRATE_ACCURACY && !CONFIG_THREE_PASS
assert(cpi->gf_frame_index == 0);
av1_vbr_rc_update_q_index_list(&cpi->vbr_rc_info, &cpi->ppi->tpl_data,
gf_group, cm->seq_params->bit_depth);
@@ -1102,6 +1105,17 @@
} else {
av1_init_tpl_stats(&cpi->ppi->tpl_data);
}
+#if CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
+ if (cpi->oxcf.pass == AOM_RC_SECOND_PASS &&
+ cpi->second_pass_log_stream != NULL) {
+ TPL_INFO *tpl_info;
+ AOM_CHECK_MEM_ERROR(cm->error, tpl_info, aom_malloc(sizeof(*tpl_info)));
+ av1_pack_tpl_info(tpl_info, gf_group, &cpi->ppi->tpl_data);
+ av1_write_tpl_info(tpl_info, cpi->second_pass_log_stream,
+ cpi->common.error);
+ aom_free(tpl_info);
+ }
+#endif // CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
}
if (av1_encode(cpi, dest, frame_input, frame_params, frame_results) !=
@@ -1532,6 +1546,42 @@
memset(&frame_params, 0, sizeof(frame_params));
memset(&frame_results, 0, sizeof(frame_results));
+#if CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
+ VBR_RATECTRL_INFO *vbr_rc_info = &cpi->vbr_rc_info;
+ if (oxcf->pass == AOM_RC_THIRD_PASS && vbr_rc_info->ready == 0) {
+ THIRD_PASS_FRAME_INFO frame_info[MAX_THIRD_PASS_BUF];
+ av1_open_second_pass_log(cpi, 1);
+ FILE *second_pass_log_stream = cpi->second_pass_log_stream;
+ fseek(second_pass_log_stream, 0, SEEK_END);
+ size_t file_size = ftell(second_pass_log_stream);
+ rewind(second_pass_log_stream);
+ size_t read_size = 0;
+ while (read_size < file_size) {
+ THIRD_PASS_GOP_INFO gop_info;
+ struct aom_internal_error_info *error = cpi->common.error;
+ // Read in GOP information from the second pass file.
+ av1_read_second_pass_gop_info(second_pass_log_stream, &gop_info, error);
+ TPL_INFO *tpl_info;
+ AOM_CHECK_MEM_ERROR(cm->error, tpl_info, aom_malloc(sizeof(*tpl_info)));
+ av1_read_tpl_info(tpl_info, second_pass_log_stream, error);
+ // Read in per-frame info from second-pass encoding
+ av1_read_second_pass_per_frame_info(second_pass_log_stream, frame_info,
+ gop_info.num_frames, error);
+ av1_vbr_rc_append_tpl_info(vbr_rc_info, tpl_info);
+ read_size = ftell(second_pass_log_stream);
+ aom_free(tpl_info);
+ }
+ av1_close_second_pass_log(cpi);
+ vbr_rc_info->base_q_index = av1_vbr_rc_info_estimate_base_q(
+ vbr_rc_info->total_bit_budget, cm->seq_params->bit_depth,
+ vbr_rc_info->scale_factors, vbr_rc_info->total_frame_count,
+ vbr_rc_info->update_type_list, vbr_rc_info->qstep_ratio_list,
+ vbr_rc_info->txfm_stats_list, vbr_rc_info->q_index_list,
+ vbr_rc_info->estimated_bitrate_byframe);
+ vbr_rc_info->ready = 1;
+ }
+#endif // CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
+
// Check if we need to stuff more src frames
if (flush == 0) {
int srcbuf_size =
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 175096a..10ab004 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2669,10 +2669,25 @@
#endif // CONFIG_RD_COMMAND
#if CONFIG_BITRATE_ACCURACY
+#if CONFIG_THREE_PASS
+ if (oxcf->pass == AOM_RC_THIRD_PASS && cpi->vbr_rc_info.ready == 1) {
+ int gop_idx = cpi->vbr_rc_info.cur_gop_idx;
+ int gop_start_idx = cpi->vbr_rc_info.gop_start_idx_list[gop_idx];
+ int cur_frame_idx = gop_start_idx + cpi->gf_frame_index;
+ if (cur_frame_idx < cpi->vbr_rc_info.total_frame_count) {
+ q = cpi->vbr_rc_info.q_index_list[cur_frame_idx];
+ } else {
+ // TODO(angiebird): Investiage why sometimes there is an extra frame
+ // after the last GOP.
+ q = 255;
+ }
+ }
+#else
if (cpi->vbr_rc_info.q_index_list_ready) {
q = cpi->vbr_rc_info.q_index_list[cpi->gf_frame_index];
}
-#endif
+#endif // CONFIG_THREE_PASS
+#endif // CONFIG_BITRATE_ACCURACY
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 6fd0d75..a9ff0f8 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -1081,7 +1081,7 @@
// interval is not shortened.
if (is_temporal_filter_enabled && !shorten_gf_interval) {
cpi->skip_tpl_setup_stats = 1;
-#if CONFIG_BITRATE_ACCURACY
+#if CONFIG_BITRATE_ACCURACY && !CONFIG_THREE_PASS
assert(cpi->gf_frame_index == 0);
av1_vbr_rc_update_q_index_list(&cpi->vbr_rc_info, &cpi->ppi->tpl_data,
gf_group,
@@ -3711,7 +3711,7 @@
// Define a new GF/ARF group. (Should always enter here for key frames).
if (cpi->gf_frame_index == gf_group->size) {
av1_tf_info_reset(&cpi->ppi->tf_info);
-#if CONFIG_BITRATE_ACCURACY
+#if CONFIG_BITRATE_ACCURACY && !CONFIG_THREE_PASS
vbr_rc_reset_gop_data(&cpi->vbr_rc_info);
#endif // CONFIG_BITRATE_ACCURACY
int max_gop_length =
@@ -3778,6 +3778,18 @@
// Read in GOP information from the second pass file.
av1_read_second_pass_gop_info(cpi->second_pass_log_stream, gop_info,
cpi->common.error);
+#if CONFIG_BITRATE_ACCURACY
+ TPL_INFO *tpl_info;
+ AOM_CHECK_MEM_ERROR(cpi->common.error, tpl_info,
+ aom_malloc(sizeof(*tpl_info)));
+ av1_read_tpl_info(tpl_info, cpi->second_pass_log_stream,
+ cpi->common.error);
+ aom_free(tpl_info);
+#if CONFIG_THREE_PASS
+ // TODO(angiebird): Put this part into a func
+ cpi->vbr_rc_info.cur_gop_idx++;
+#endif // CONFIG_THREE_PASS
+#endif // CONFIG_BITRATE_ACCURACY
// Read in third_pass_info from the bitstream.
av1_set_gop_third_pass(cpi->third_pass_ctx);
// Read in per-frame info from second-pass encoding
diff --git a/av1/encoder/thirdpass.c b/av1/encoder/thirdpass.c
index de30849..5621247 100644
--- a/av1/encoder/thirdpass.c
+++ b/av1/encoder/thirdpass.c
@@ -705,3 +705,68 @@
return corner_mi->partition;
}
+
+#if CONFIG_BITRATE_ACCURACY
+static void fwrite_and_check(const void *ptr, size_t size, size_t nmemb,
+ FILE *stream,
+ struct aom_internal_error_info *error) {
+ int count = fwrite(ptr, size, nmemb, stream);
+ if (count < 1) {
+ aom_internal_error(error, AOM_CODEC_ERROR, "fwrite_and_check failed\n");
+ }
+}
+
+static void fread_and_check(void *ptr, size_t size, size_t nmemb, FILE *stream,
+ struct aom_internal_error_info *error) {
+ int count = fread(ptr, size, nmemb, stream);
+ if (count < 1) {
+ aom_internal_error(error, AOM_CODEC_ERROR, "fread_and_check failed\n");
+ }
+}
+
+void av1_pack_tpl_info(TPL_INFO *tpl_info, const GF_GROUP *gf_group,
+ const TplParams *tpl_data) {
+ tpl_info->tpl_ready = tpl_data->ready;
+ if (tpl_info->tpl_ready) {
+ tpl_info->gf_length = gf_group->size;
+ for (int i = 0; i < tpl_info->gf_length; ++i) {
+ tpl_info->txfm_stats_list[i] = tpl_data->txfm_stats_list[i];
+ tpl_info->qstep_ratio_ls[i] = av1_tpl_get_qstep_ratio(tpl_data, i);
+ }
+ }
+}
+
+void av1_write_tpl_info(const TPL_INFO *tpl_info, FILE *log_stream,
+ struct aom_internal_error_info *error) {
+ fwrite(&tpl_info->tpl_ready, sizeof(tpl_info->tpl_ready), 1, log_stream);
+ if (tpl_info->tpl_ready) {
+ fwrite_and_check(&tpl_info->gf_length, sizeof(tpl_info->gf_length), 1,
+ log_stream, error);
+ assert(tpl_info->gf_length <= MAX_LENGTH_TPL_FRAME_STATS);
+ fwrite_and_check(&tpl_info->txfm_stats_list,
+ sizeof(tpl_info->txfm_stats_list[0]), tpl_info->gf_length,
+ log_stream, error);
+ fwrite_and_check(&tpl_info->qstep_ratio_ls,
+ sizeof(tpl_info->qstep_ratio_ls[0]), tpl_info->gf_length,
+ log_stream, error);
+ }
+}
+
+void av1_read_tpl_info(TPL_INFO *tpl_info, FILE *log_stream,
+ struct aom_internal_error_info *error) {
+ av1_zero(*tpl_info);
+ fread_and_check(&tpl_info->tpl_ready, sizeof(tpl_info->tpl_ready), 1,
+ log_stream, error);
+ if (tpl_info->tpl_ready) {
+ fread_and_check(&tpl_info->gf_length, sizeof(tpl_info->gf_length), 1,
+ log_stream, error);
+ assert(tpl_info->gf_length <= MAX_LENGTH_TPL_FRAME_STATS);
+ fread_and_check(&tpl_info->txfm_stats_list,
+ sizeof(tpl_info->txfm_stats_list[0]), tpl_info->gf_length,
+ log_stream, error);
+ fread_and_check(&tpl_info->qstep_ratio_ls,
+ sizeof(tpl_info->qstep_ratio_ls[0]), tpl_info->gf_length,
+ log_stream, error);
+ }
+}
+#endif // CONFIG_BITRATE_ACCURACY
diff --git a/av1/encoder/thirdpass.h b/av1/encoder/thirdpass.h
index fbc5a67..4117532 100644
--- a/av1/encoder/thirdpass.h
+++ b/av1/encoder/thirdpass.h
@@ -19,6 +19,7 @@
#include "av1/encoder/firstpass.h"
#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/tpl_model.h"
struct AV1_COMP;
@@ -34,6 +35,15 @@
int use_arf;
} THIRD_PASS_GOP_INFO;
+#if CONFIG_BITRATE_ACCURACY
+typedef struct TPL_INFO {
+ int gf_length;
+ int tpl_ready;
+ TplTxfmStats txfm_stats_list[MAX_LENGTH_TPL_FRAME_STATS];
+ double qstep_ratio_ls[MAX_LENGTH_TPL_FRAME_STATS];
+} TPL_INFO;
+#endif // CONFIG_BITRATE_ACCURACY
+
typedef struct {
BLOCK_SIZE bsize;
PARTITION_TYPE partition;
@@ -167,6 +177,18 @@
PARTITION_TYPE av1_third_pass_get_sb_part_type(THIRD_PASS_DEC_CTX *ctx,
THIRD_PASS_MI_INFO *this_mi);
+#if CONFIG_BITRATE_ACCURACY
+
+void av1_pack_tpl_info(TPL_INFO *tpl_info, const GF_GROUP *gf_group,
+ const TplParams *tpl_data);
+
+void av1_write_tpl_info(const TPL_INFO *tpl_info, FILE *log_stream,
+ struct aom_internal_error_info *error);
+
+void av1_read_tpl_info(TPL_INFO *tpl_info, FILE *log_stream,
+ struct aom_internal_error_info *error);
+
+#endif // CONFIG_BITRATE_ACCURACY
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index a5b90ce..74a8eac 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -2031,6 +2031,8 @@
#if CONFIG_BITRATE_ACCURACY
void av1_vbr_rc_init(VBR_RATECTRL_INFO *vbr_rc_info, double total_bit_budget,
int show_frame_count) {
+ av1_zero(*vbr_rc_info);
+ vbr_rc_info->ready = 0;
vbr_rc_info->total_bit_budget = total_bit_budget;
vbr_rc_info->show_frame_count = show_frame_count;
const double scale_factors[FRAME_UPDATE_TYPES] = { 0.94559, 0.94559, 1,
@@ -2051,8 +2053,36 @@
sizeof(mv_scale_factors[0]) * FRAME_UPDATE_TYPES);
vbr_rc_reset_gop_data(vbr_rc_info);
+#if CONFIG_THREE_PASS
+ // TODO(angiebird): Explain why we use -1 here
+ vbr_rc_info->cur_gop_idx = -1;
+ vbr_rc_info->gop_count = 0;
+ vbr_rc_info->total_frame_count = 0;
+#endif // CONFIG_THREE_PASS
}
+#if CONFIG_THREE_PASS
+void av1_vbr_rc_append_tpl_info(VBR_RATECTRL_INFO *vbr_rc_info,
+ const TPL_INFO *tpl_info) {
+ int gop_start_idx = vbr_rc_info->total_frame_count;
+ vbr_rc_info->gop_start_idx_list[vbr_rc_info->gop_count] = gop_start_idx;
+ vbr_rc_info->gop_length_list[vbr_rc_info->gop_count] = tpl_info->gf_length;
+ assert(gop_start_idx + tpl_info->gf_length <= VBR_RC_INFO_MAX_FRAMES);
+ for (int i = 0; i < tpl_info->gf_length; ++i) {
+ vbr_rc_info->txfm_stats_list[gop_start_idx + i] =
+ tpl_info->txfm_stats_list[i];
+ vbr_rc_info->qstep_ratio_list[gop_start_idx + i] =
+ tpl_info->qstep_ratio_ls[i];
+ // TODO(angiebird): This is a hack. We currently apply same scale factor
+ // for each update_type, therefore setting everying to ARF_UPDATE is
+ // temporarily okay. Properly set the update_type later.
+ vbr_rc_info->update_type_list[gop_start_idx + i] = ARF_UPDATE;
+ }
+ vbr_rc_info->total_frame_count += tpl_info->gf_length;
+ vbr_rc_info->gop_count++;
+}
+#endif // CONFIG_THREE_PASS
+
void av1_vbr_rc_set_gop_bit_budget(VBR_RATECTRL_INFO *vbr_rc_info,
int gop_showframe_count) {
vbr_rc_info->gop_showframe_count = gop_showframe_count;
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index a811098..30f9571 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -24,6 +24,7 @@
struct EncodeFrameParams;
struct EncodeFrameInput;
struct GF_GROUP;
+struct TPL_INFO;
#include "config/aom_config.h"
@@ -223,11 +224,19 @@
} TplParams;
#if CONFIG_BITRATE_ACCURACY
+
+#if CONFIG_THREE_PASS
+#define VBR_RC_INFO_MAX_FRAMES 500
+#else // CONFIG_THREE_PASS
+#define VBR_RC_INFO_MAX_FRAMES MAX_LENGTH_TPL_FRAME_STATS
+#endif // CONFIG_THREE_PASS
+
/*!
* \brief This structure stores information needed for bitrate accuracy
* experiment.
*/
typedef struct {
+ int ready;
double total_bit_budget; // The total bit budget of the entire video
int show_frame_count; // Number of show frames in the entire video
@@ -241,17 +250,27 @@
// === Below this line are GOP related data that will be updated per GOP ===
int base_q_index; // Stores the base q index.
int q_index_list_ready;
- int q_index_list[MAX_LENGTH_TPL_FRAME_STATS]; // q indices for the current
- // GOP
+ int q_index_list[VBR_RC_INFO_MAX_FRAMES]; // q indices for the current
+ // GOP
// Arrays to store frame level bitrate accuracy data.
- double estimated_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
- double estimated_mv_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
- int actual_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
- int actual_mv_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
- int actual_coeff_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
+ double estimated_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
+ double estimated_mv_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
+ int actual_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
+ int actual_mv_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
+ int actual_coeff_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
// Array to store qstep_ratio for each frame in a GOP
- double qstep_ratio_list[MAX_LENGTH_TPL_FRAME_STATS];
+ double qstep_ratio_list[VBR_RC_INFO_MAX_FRAMES];
+
+#if CONFIG_THREE_PASS
+ TplTxfmStats txfm_stats_list[VBR_RC_INFO_MAX_FRAMES];
+ FRAME_UPDATE_TYPE update_type_list[VBR_RC_INFO_MAX_FRAMES];
+ int gop_start_idx_list[VBR_RC_INFO_MAX_FRAMES];
+ int gop_length_list[VBR_RC_INFO_MAX_FRAMES];
+ int cur_gop_idx;
+ int total_frame_count;
+ int gop_count;
+#endif // CONFIG_THREE_PASS
} VBR_RATECTRL_INFO;
static INLINE void vbr_rc_reset_gop_data(VBR_RATECTRL_INFO *vbr_rc_info) {
@@ -266,6 +285,9 @@
void av1_vbr_rc_init(VBR_RATECTRL_INFO *vbr_rc_info, double total_bit_budget,
int show_frame_count);
+
+void av1_vbr_rc_append_tpl_info(VBR_RATECTRL_INFO *vbr_rc_info,
+ const struct TPL_INFO *tpl_info);
void av1_vbr_rc_set_gop_bit_budget(VBR_RATECTRL_INFO *vbr_rc_info,
int gop_showframe_count);