Integrate BITRATE_ACCURACY with three pass mode

Preliminary Results:
(There might be some hidden issues that cause  mixed results
in 150 frames)
BITRATE_ACCURACY vs BITRATE_ACCURACY_BL-->av1 vbr mode with 3 pass on

vbr mode 150 frames
metric  avg_psnr ovr_psnr ssim
ugc360p -0.994%  -4.225% -0.606%
lowres   2.395%   1.492%  9.491%
midres   1.499%  -0.089%  3.784%
hdres    0.876%  -0.368%  2.554%

vbr mode 33 frames
metric  avg_psnr ovr_psnr ssim
ugc360p -2.722%  -3.767%  -1.627%
lowres   0.149%  -0.785%   6.130%
midres  -0.806%  -1.378%   1.338%
hdres   -0.869%  -1.620%  -0.198%

vbr mode 17 frames
metric  avg_psnr ovr_psnr ssim
ugc360p -3.684%  -3.529%  -2.729%
lowres   0.491%   0.680%   4.379%
midres  -2.052%  -3.011%  -0.792%
hdres   -1.341%  -2.470%  -0.917%

Notes:
0) The big performance drops in 33 frames and 150 frames are resolved
when using whole sequence's tpl stats.
1) The experiment shows significant gains in ugc360p 150 frames. This is
a good indicator that using shared base layer q index will lead to
better compression performance.
2) We see lowres 150 frames has 1.3% drop, this might be caused by some
hidden issues that required further investigation
3) We didn't report abs_rc_error because my testing process changed.
Haven't figure out how to get abs_rc_error yet. But I looked at several
several examples and figured that we might need to revisit the frame
bits model's accuracy again.

BUG=aomedia:3045

Change-Id: I9c2ac3c4945735c4d2ae8a9a08b91966c0978992
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 6af6b87..80c89b5 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -33,6 +33,9 @@
 #include "av1/encoder/gop_structure.h"
 #include "av1/encoder/pass2_strategy.h"
 #include "av1/encoder/temporal_filter.h"
+#if CONFIG_THREE_PASS
+#include "av1/encoder/thirdpass.h"
+#endif  // CONFIG_THREE_PASS
 #include "av1/encoder/tpl_model.h"
 
 #if CONFIG_TUNE_VMAF
@@ -1093,7 +1096,7 @@
       if (!cpi->skip_tpl_setup_stats) {
         av1_tpl_preload_rc_estimate(cpi, frame_params);
         av1_tpl_setup_stats(cpi, 0, frame_params);
-#if CONFIG_BITRATE_ACCURACY
+#if CONFIG_BITRATE_ACCURACY && !CONFIG_THREE_PASS
         assert(cpi->gf_frame_index == 0);
         av1_vbr_rc_update_q_index_list(&cpi->vbr_rc_info, &cpi->ppi->tpl_data,
                                        gf_group, cm->seq_params->bit_depth);
@@ -1102,6 +1105,17 @@
     } else {
       av1_init_tpl_stats(&cpi->ppi->tpl_data);
     }
+#if CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
+    if (cpi->oxcf.pass == AOM_RC_SECOND_PASS &&
+        cpi->second_pass_log_stream != NULL) {
+      TPL_INFO *tpl_info;
+      AOM_CHECK_MEM_ERROR(cm->error, tpl_info, aom_malloc(sizeof(*tpl_info)));
+      av1_pack_tpl_info(tpl_info, gf_group, &cpi->ppi->tpl_data);
+      av1_write_tpl_info(tpl_info, cpi->second_pass_log_stream,
+                         cpi->common.error);
+      aom_free(tpl_info);
+    }
+#endif  // CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
   }
 
   if (av1_encode(cpi, dest, frame_input, frame_params, frame_results) !=
@@ -1532,6 +1546,42 @@
   memset(&frame_params, 0, sizeof(frame_params));
   memset(&frame_results, 0, sizeof(frame_results));
 
+#if CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
+  VBR_RATECTRL_INFO *vbr_rc_info = &cpi->vbr_rc_info;
+  if (oxcf->pass == AOM_RC_THIRD_PASS && vbr_rc_info->ready == 0) {
+    THIRD_PASS_FRAME_INFO frame_info[MAX_THIRD_PASS_BUF];
+    av1_open_second_pass_log(cpi, 1);
+    FILE *second_pass_log_stream = cpi->second_pass_log_stream;
+    fseek(second_pass_log_stream, 0, SEEK_END);
+    size_t file_size = ftell(second_pass_log_stream);
+    rewind(second_pass_log_stream);
+    size_t read_size = 0;
+    while (read_size < file_size) {
+      THIRD_PASS_GOP_INFO gop_info;
+      struct aom_internal_error_info *error = cpi->common.error;
+      // Read in GOP information from the second pass file.
+      av1_read_second_pass_gop_info(second_pass_log_stream, &gop_info, error);
+      TPL_INFO *tpl_info;
+      AOM_CHECK_MEM_ERROR(cm->error, tpl_info, aom_malloc(sizeof(*tpl_info)));
+      av1_read_tpl_info(tpl_info, second_pass_log_stream, error);
+      // Read in per-frame info from second-pass encoding
+      av1_read_second_pass_per_frame_info(second_pass_log_stream, frame_info,
+                                          gop_info.num_frames, error);
+      av1_vbr_rc_append_tpl_info(vbr_rc_info, tpl_info);
+      read_size = ftell(second_pass_log_stream);
+      aom_free(tpl_info);
+    }
+    av1_close_second_pass_log(cpi);
+    vbr_rc_info->base_q_index = av1_vbr_rc_info_estimate_base_q(
+        vbr_rc_info->total_bit_budget, cm->seq_params->bit_depth,
+        vbr_rc_info->scale_factors, vbr_rc_info->total_frame_count,
+        vbr_rc_info->update_type_list, vbr_rc_info->qstep_ratio_list,
+        vbr_rc_info->txfm_stats_list, vbr_rc_info->q_index_list,
+        vbr_rc_info->estimated_bitrate_byframe);
+    vbr_rc_info->ready = 1;
+  }
+#endif  // CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
+
   // Check if we need to stuff more src frames
   if (flush == 0) {
     int srcbuf_size =
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 175096a..10ab004 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2669,10 +2669,25 @@
 #endif  // CONFIG_RD_COMMAND
 
 #if CONFIG_BITRATE_ACCURACY
+#if CONFIG_THREE_PASS
+    if (oxcf->pass == AOM_RC_THIRD_PASS && cpi->vbr_rc_info.ready == 1) {
+      int gop_idx = cpi->vbr_rc_info.cur_gop_idx;
+      int gop_start_idx = cpi->vbr_rc_info.gop_start_idx_list[gop_idx];
+      int cur_frame_idx = gop_start_idx + cpi->gf_frame_index;
+      if (cur_frame_idx < cpi->vbr_rc_info.total_frame_count) {
+        q = cpi->vbr_rc_info.q_index_list[cur_frame_idx];
+      } else {
+        // TODO(angiebird): Investiage why sometimes there is an extra frame
+        // after the last GOP.
+        q = 255;
+      }
+    }
+#else
     if (cpi->vbr_rc_info.q_index_list_ready) {
       q = cpi->vbr_rc_info.q_index_list[cpi->gf_frame_index];
     }
-#endif
+#endif  // CONFIG_THREE_PASS
+#endif  // CONFIG_BITRATE_ACCURACY
     av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
                       q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
     av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 6fd0d75..a9ff0f8 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -1081,7 +1081,7 @@
       // interval is not shortened.
       if (is_temporal_filter_enabled && !shorten_gf_interval) {
         cpi->skip_tpl_setup_stats = 1;
-#if CONFIG_BITRATE_ACCURACY
+#if CONFIG_BITRATE_ACCURACY && !CONFIG_THREE_PASS
         assert(cpi->gf_frame_index == 0);
         av1_vbr_rc_update_q_index_list(&cpi->vbr_rc_info, &cpi->ppi->tpl_data,
                                        gf_group,
@@ -3711,7 +3711,7 @@
   // Define a new GF/ARF group. (Should always enter here for key frames).
   if (cpi->gf_frame_index == gf_group->size) {
     av1_tf_info_reset(&cpi->ppi->tf_info);
-#if CONFIG_BITRATE_ACCURACY
+#if CONFIG_BITRATE_ACCURACY && !CONFIG_THREE_PASS
     vbr_rc_reset_gop_data(&cpi->vbr_rc_info);
 #endif  // CONFIG_BITRATE_ACCURACY
     int max_gop_length =
@@ -3778,6 +3778,18 @@
       // Read in GOP information from the second pass file.
       av1_read_second_pass_gop_info(cpi->second_pass_log_stream, gop_info,
                                     cpi->common.error);
+#if CONFIG_BITRATE_ACCURACY
+      TPL_INFO *tpl_info;
+      AOM_CHECK_MEM_ERROR(cpi->common.error, tpl_info,
+                          aom_malloc(sizeof(*tpl_info)));
+      av1_read_tpl_info(tpl_info, cpi->second_pass_log_stream,
+                        cpi->common.error);
+      aom_free(tpl_info);
+#if CONFIG_THREE_PASS
+      // TODO(angiebird): Put this part into a func
+      cpi->vbr_rc_info.cur_gop_idx++;
+#endif  // CONFIG_THREE_PASS
+#endif  // CONFIG_BITRATE_ACCURACY
       // Read in third_pass_info from the bitstream.
       av1_set_gop_third_pass(cpi->third_pass_ctx);
       // Read in per-frame info from second-pass encoding
diff --git a/av1/encoder/thirdpass.c b/av1/encoder/thirdpass.c
index de30849..5621247 100644
--- a/av1/encoder/thirdpass.c
+++ b/av1/encoder/thirdpass.c
@@ -705,3 +705,68 @@
 
   return corner_mi->partition;
 }
+
+#if CONFIG_BITRATE_ACCURACY
+static void fwrite_and_check(const void *ptr, size_t size, size_t nmemb,
+                             FILE *stream,
+                             struct aom_internal_error_info *error) {
+  int count = fwrite(ptr, size, nmemb, stream);
+  if (count < 1) {
+    aom_internal_error(error, AOM_CODEC_ERROR, "fwrite_and_check failed\n");
+  }
+}
+
+static void fread_and_check(void *ptr, size_t size, size_t nmemb, FILE *stream,
+                            struct aom_internal_error_info *error) {
+  int count = fread(ptr, size, nmemb, stream);
+  if (count < 1) {
+    aom_internal_error(error, AOM_CODEC_ERROR, "fread_and_check failed\n");
+  }
+}
+
+void av1_pack_tpl_info(TPL_INFO *tpl_info, const GF_GROUP *gf_group,
+                       const TplParams *tpl_data) {
+  tpl_info->tpl_ready = tpl_data->ready;
+  if (tpl_info->tpl_ready) {
+    tpl_info->gf_length = gf_group->size;
+    for (int i = 0; i < tpl_info->gf_length; ++i) {
+      tpl_info->txfm_stats_list[i] = tpl_data->txfm_stats_list[i];
+      tpl_info->qstep_ratio_ls[i] = av1_tpl_get_qstep_ratio(tpl_data, i);
+    }
+  }
+}
+
+void av1_write_tpl_info(const TPL_INFO *tpl_info, FILE *log_stream,
+                        struct aom_internal_error_info *error) {
+  fwrite(&tpl_info->tpl_ready, sizeof(tpl_info->tpl_ready), 1, log_stream);
+  if (tpl_info->tpl_ready) {
+    fwrite_and_check(&tpl_info->gf_length, sizeof(tpl_info->gf_length), 1,
+                     log_stream, error);
+    assert(tpl_info->gf_length <= MAX_LENGTH_TPL_FRAME_STATS);
+    fwrite_and_check(&tpl_info->txfm_stats_list,
+                     sizeof(tpl_info->txfm_stats_list[0]), tpl_info->gf_length,
+                     log_stream, error);
+    fwrite_and_check(&tpl_info->qstep_ratio_ls,
+                     sizeof(tpl_info->qstep_ratio_ls[0]), tpl_info->gf_length,
+                     log_stream, error);
+  }
+}
+
+void av1_read_tpl_info(TPL_INFO *tpl_info, FILE *log_stream,
+                       struct aom_internal_error_info *error) {
+  av1_zero(*tpl_info);
+  fread_and_check(&tpl_info->tpl_ready, sizeof(tpl_info->tpl_ready), 1,
+                  log_stream, error);
+  if (tpl_info->tpl_ready) {
+    fread_and_check(&tpl_info->gf_length, sizeof(tpl_info->gf_length), 1,
+                    log_stream, error);
+    assert(tpl_info->gf_length <= MAX_LENGTH_TPL_FRAME_STATS);
+    fread_and_check(&tpl_info->txfm_stats_list,
+                    sizeof(tpl_info->txfm_stats_list[0]), tpl_info->gf_length,
+                    log_stream, error);
+    fread_and_check(&tpl_info->qstep_ratio_ls,
+                    sizeof(tpl_info->qstep_ratio_ls[0]), tpl_info->gf_length,
+                    log_stream, error);
+  }
+}
+#endif  // CONFIG_BITRATE_ACCURACY
diff --git a/av1/encoder/thirdpass.h b/av1/encoder/thirdpass.h
index fbc5a67..4117532 100644
--- a/av1/encoder/thirdpass.h
+++ b/av1/encoder/thirdpass.h
@@ -19,6 +19,7 @@
 
 #include "av1/encoder/firstpass.h"
 #include "av1/encoder/ratectrl.h"
+#include "av1/encoder/tpl_model.h"
 
 struct AV1_COMP;
 
@@ -34,6 +35,15 @@
   int use_arf;
 } THIRD_PASS_GOP_INFO;
 
+#if CONFIG_BITRATE_ACCURACY
+typedef struct TPL_INFO {
+  int gf_length;
+  int tpl_ready;
+  TplTxfmStats txfm_stats_list[MAX_LENGTH_TPL_FRAME_STATS];
+  double qstep_ratio_ls[MAX_LENGTH_TPL_FRAME_STATS];
+} TPL_INFO;
+#endif  // CONFIG_BITRATE_ACCURACY
+
 typedef struct {
   BLOCK_SIZE bsize;
   PARTITION_TYPE partition;
@@ -167,6 +177,18 @@
 PARTITION_TYPE av1_third_pass_get_sb_part_type(THIRD_PASS_DEC_CTX *ctx,
                                                THIRD_PASS_MI_INFO *this_mi);
 
+#if CONFIG_BITRATE_ACCURACY
+
+void av1_pack_tpl_info(TPL_INFO *tpl_info, const GF_GROUP *gf_group,
+                       const TplParams *tpl_data);
+
+void av1_write_tpl_info(const TPL_INFO *tpl_info, FILE *log_stream,
+                        struct aom_internal_error_info *error);
+
+void av1_read_tpl_info(TPL_INFO *tpl_info, FILE *log_stream,
+                       struct aom_internal_error_info *error);
+
+#endif  // CONFIG_BITRATE_ACCURACY
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index a5b90ce..74a8eac 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -2031,6 +2031,8 @@
 #if CONFIG_BITRATE_ACCURACY
 void av1_vbr_rc_init(VBR_RATECTRL_INFO *vbr_rc_info, double total_bit_budget,
                      int show_frame_count) {
+  av1_zero(*vbr_rc_info);
+  vbr_rc_info->ready = 0;
   vbr_rc_info->total_bit_budget = total_bit_budget;
   vbr_rc_info->show_frame_count = show_frame_count;
   const double scale_factors[FRAME_UPDATE_TYPES] = { 0.94559, 0.94559, 1,
@@ -2051,8 +2053,36 @@
          sizeof(mv_scale_factors[0]) * FRAME_UPDATE_TYPES);
 
   vbr_rc_reset_gop_data(vbr_rc_info);
+#if CONFIG_THREE_PASS
+  // TODO(angiebird): Explain why we use -1 here
+  vbr_rc_info->cur_gop_idx = -1;
+  vbr_rc_info->gop_count = 0;
+  vbr_rc_info->total_frame_count = 0;
+#endif  // CONFIG_THREE_PASS
 }
 
+#if CONFIG_THREE_PASS
+void av1_vbr_rc_append_tpl_info(VBR_RATECTRL_INFO *vbr_rc_info,
+                                const TPL_INFO *tpl_info) {
+  int gop_start_idx = vbr_rc_info->total_frame_count;
+  vbr_rc_info->gop_start_idx_list[vbr_rc_info->gop_count] = gop_start_idx;
+  vbr_rc_info->gop_length_list[vbr_rc_info->gop_count] = tpl_info->gf_length;
+  assert(gop_start_idx + tpl_info->gf_length <= VBR_RC_INFO_MAX_FRAMES);
+  for (int i = 0; i < tpl_info->gf_length; ++i) {
+    vbr_rc_info->txfm_stats_list[gop_start_idx + i] =
+        tpl_info->txfm_stats_list[i];
+    vbr_rc_info->qstep_ratio_list[gop_start_idx + i] =
+        tpl_info->qstep_ratio_ls[i];
+    // TODO(angiebird): This is a hack. We currently apply same scale factor
+    // for each update_type, therefore setting everying to ARF_UPDATE is
+    // temporarily okay. Properly set  the update_type later.
+    vbr_rc_info->update_type_list[gop_start_idx + i] = ARF_UPDATE;
+  }
+  vbr_rc_info->total_frame_count += tpl_info->gf_length;
+  vbr_rc_info->gop_count++;
+}
+#endif  // CONFIG_THREE_PASS
+
 void av1_vbr_rc_set_gop_bit_budget(VBR_RATECTRL_INFO *vbr_rc_info,
                                    int gop_showframe_count) {
   vbr_rc_info->gop_showframe_count = gop_showframe_count;
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index a811098..30f9571 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -24,6 +24,7 @@
 struct EncodeFrameParams;
 struct EncodeFrameInput;
 struct GF_GROUP;
+struct TPL_INFO;
 
 #include "config/aom_config.h"
 
@@ -223,11 +224,19 @@
 } TplParams;
 
 #if CONFIG_BITRATE_ACCURACY
+
+#if CONFIG_THREE_PASS
+#define VBR_RC_INFO_MAX_FRAMES 500
+#else  // CONFIG_THREE_PASS
+#define VBR_RC_INFO_MAX_FRAMES MAX_LENGTH_TPL_FRAME_STATS
+#endif  //  CONFIG_THREE_PASS
+
 /*!
  * \brief This structure stores information needed for bitrate accuracy
  * experiment.
  */
 typedef struct {
+  int ready;
   double total_bit_budget;  // The total bit budget of the entire video
   int show_frame_count;     // Number of show frames in the entire video
 
@@ -241,17 +250,27 @@
   // === Below this line are GOP related data that will be updated per GOP ===
   int base_q_index;  // Stores the base q index.
   int q_index_list_ready;
-  int q_index_list[MAX_LENGTH_TPL_FRAME_STATS];  // q indices for the current
-                                                 // GOP
+  int q_index_list[VBR_RC_INFO_MAX_FRAMES];  // q indices for the current
+                                             // GOP
   // Arrays to store frame level bitrate accuracy data.
-  double estimated_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
-  double estimated_mv_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
-  int actual_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
-  int actual_mv_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
-  int actual_coeff_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
+  double estimated_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
+  double estimated_mv_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
+  int actual_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
+  int actual_mv_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
+  int actual_coeff_bitrate_byframe[VBR_RC_INFO_MAX_FRAMES];
 
   // Array to store qstep_ratio for each frame in a GOP
-  double qstep_ratio_list[MAX_LENGTH_TPL_FRAME_STATS];
+  double qstep_ratio_list[VBR_RC_INFO_MAX_FRAMES];
+
+#if CONFIG_THREE_PASS
+  TplTxfmStats txfm_stats_list[VBR_RC_INFO_MAX_FRAMES];
+  FRAME_UPDATE_TYPE update_type_list[VBR_RC_INFO_MAX_FRAMES];
+  int gop_start_idx_list[VBR_RC_INFO_MAX_FRAMES];
+  int gop_length_list[VBR_RC_INFO_MAX_FRAMES];
+  int cur_gop_idx;
+  int total_frame_count;
+  int gop_count;
+#endif  // CONFIG_THREE_PASS
 } VBR_RATECTRL_INFO;
 
 static INLINE void vbr_rc_reset_gop_data(VBR_RATECTRL_INFO *vbr_rc_info) {
@@ -266,6 +285,9 @@
 
 void av1_vbr_rc_init(VBR_RATECTRL_INFO *vbr_rc_info, double total_bit_budget,
                      int show_frame_count);
+
+void av1_vbr_rc_append_tpl_info(VBR_RATECTRL_INFO *vbr_rc_info,
+                                const struct TPL_INFO *tpl_info);
 void av1_vbr_rc_set_gop_bit_budget(VBR_RATECTRL_INFO *vbr_rc_info,
                                    int gop_showframe_count);