RC: Implement func to send TPL stats

Bug: b:450252793
Change-Id: I78ba60eb52089f086c8d6c24e5da249773897542
diff --git a/av1/encoder/av1_ext_ratectrl.c b/av1/encoder/av1_ext_ratectrl.c
index 65e1092..7ca1165 100644
--- a/av1/encoder/av1_ext_ratectrl.c
+++ b/av1/encoder/av1_ext_ratectrl.c
@@ -105,6 +105,20 @@
   return AOM_CODEC_OK;
 }
 
+aom_codec_err_t av1_extrc_send_tpl_stats(AOM_EXT_RATECTRL *ext_ratectrl,
+                                         const AomTplGopStats *tpl_gop_stats) {
+  assert(ext_ratectrl != NULL);
+  assert(tpl_gop_stats != NULL);
+  if (ext_ratectrl->ready && ext_ratectrl->funcs.send_tpl_gop_stats != NULL) {
+    aom_rc_status_t rc_status = ext_ratectrl->funcs.send_tpl_gop_stats(
+        ext_ratectrl->model, tpl_gop_stats);
+    if (rc_status == AOM_RC_ERROR) {
+      return AOM_CODEC_ERROR;
+    }
+  }
+  return AOM_CODEC_OK;
+}
+
 aom_codec_err_t av1_extrc_delete(AOM_EXT_RATECTRL *ext_ratectrl) {
   if (ext_ratectrl == NULL) {
     return AOM_CODEC_INVALID_PARAM;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 361332a..24dc1d8 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -3712,6 +3712,11 @@
    * External rate control.
    */
   AOM_EXT_RATECTRL ext_ratectrl;
+
+  /*!
+   * Store TPL stats before propagation
+   */
+  AomTplGopStats tpl_gop_stats;
 } AV1_COMP;
 
 /*!
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 02366dc..0ffb7cc 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -378,6 +378,10 @@
 
   aom_free(cpi->mb_delta_q);
   cpi->mb_delta_q = NULL;
+
+#if !CONFIG_REALTIME_ONLY
+  av1_free_tpl_gop_stats(&cpi->tpl_gop_stats);
+#endif
 }
 
 static inline void allocate_gradient_info_for_hog(AV1_COMP *cpi) {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index a8c2f83..60dd4de 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1427,6 +1427,45 @@
       gf_group->layer_depth[frame_idx] >= layer_depth_th;
 }
 
+static void tpl_store_before_propagation(AomTplBlockStats *tpl_block_stats,
+                                         TplDepStats *src_stats, int mi_row,
+                                         int mi_col) {
+  tpl_block_stats->row = mi_row * MI_SIZE;
+  tpl_block_stats->col = mi_col * MI_SIZE;
+  tpl_block_stats->srcrf_sse = src_stats->srcrf_sse;
+  // These need to be scaled down for external RC as libaom scales them up
+  // first. See b/274644689.
+  tpl_block_stats->srcrf_dist =
+      src_stats->srcrf_dist >> TPL_DEP_COST_SCALE_LOG2;
+  tpl_block_stats->recrf_sse = src_stats->recrf_sse >> TPL_DEP_COST_SCALE_LOG2;
+  tpl_block_stats->recrf_dist =
+      src_stats->recrf_dist >> TPL_DEP_COST_SCALE_LOG2;
+  tpl_block_stats->intra_sse = src_stats->intra_sse >> TPL_DEP_COST_SCALE_LOG2;
+  tpl_block_stats->intra_dist =
+      src_stats->intra_dist >> TPL_DEP_COST_SCALE_LOG2;
+  tpl_block_stats->cmp_recrf_dist[0] = src_stats->cmp_recrf_dist[0];
+  tpl_block_stats->cmp_recrf_dist[1] = src_stats->cmp_recrf_dist[1];
+  tpl_block_stats->mc_dep_rate = src_stats->mc_dep_rate;
+  tpl_block_stats->mc_dep_dist = src_stats->mc_dep_dist;
+  tpl_block_stats->inter_cost = src_stats->inter_cost;
+  tpl_block_stats->intra_cost = src_stats->intra_cost;
+  tpl_block_stats->srcrf_rate = src_stats->srcrf_rate;
+  tpl_block_stats->recrf_rate = src_stats->recrf_rate;
+  tpl_block_stats->intra_rate = src_stats->intra_rate;
+  tpl_block_stats->cmp_recrf_rate[0] = src_stats->cmp_recrf_rate[0];
+  tpl_block_stats->cmp_recrf_rate[1] = src_stats->cmp_recrf_rate[1];
+  tpl_block_stats->ref_frame_index[0] = src_stats->ref_frame_index[0];
+  tpl_block_stats->ref_frame_index[1] = src_stats->ref_frame_index[1];
+  for (int ref = 0; ref < AOM_RC_INTER_REFS_PER_FRAME; ++ref) {
+    tpl_block_stats->mv[ref].as_mv.col = src_stats->mv[ref].as_mv.col;
+    tpl_block_stats->mv[ref].as_mv.row = src_stats->mv[ref].as_mv.row;
+    tpl_block_stats->mv[ref].as_fullmv.col = src_stats->mv[ref].as_fullmv.col;
+    tpl_block_stats->mv[ref].as_fullmv.row = src_stats->mv[ref].as_fullmv.row;
+    tpl_block_stats->mv[ref].as_int = src_stats->mv[ref].as_int;
+    tpl_block_stats->pred_error[ref] = src_stats->pred_error[ref];
+  }
+}
+
 // This function stores the motion estimation dependencies of all the blocks in
 // a row
 void av1_mc_flow_dispenser_row(AV1_COMP *cpi, TplTxfmStats *tpl_txfm_stats,
@@ -1441,6 +1480,9 @@
   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_data->frame_idx];
   MACROBLOCKD *xd = &x->e_mbd;
 
+  AomTplFrameStats *tpl_frame_stats_before_propagation =
+      &cpi->tpl_gop_stats.frame_stats_list[tpl_data->frame_idx];
+
   const int tplb_cols_in_tile =
       ROUND_POWER_OF_TWO(mi_params->mi_cols, mi_size_wide_log2[bsize]);
   const int tplb_row = ROUND_POWER_OF_TWO(mi_row, mi_size_high_log2[bsize]);
@@ -1476,6 +1518,11 @@
     // Motion flow dependency dispenser.
     tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, tpl_frame->stride,
                     &tpl_stats, tpl_data->tpl_stats_block_mis_log2);
+
+    AomTplBlockStats *block_stats =
+        &tpl_frame_stats_before_propagation
+             ->block_stats_list[mi_row * tpl_frame->mi_cols + mi_col];
+    tpl_store_before_propagation(block_stats, &tpl_stats, mi_row, mi_col);
     (*tpl_row_mt->sync_write_ptr)(&tpl_data->tpl_mt_sync, tplb_row,
                                   tplb_col_in_tile, tplb_cols_in_tile);
   }
@@ -1521,7 +1568,7 @@
   }
 }
 
-static inline void init_gop_frames_for_tpl(
+static inline int init_gop_frames_for_tpl(
     AV1_COMP *cpi, const EncodeFrameParams *const init_frame_params,
     GF_GROUP *gf_group, int *tpl_group_frames, int *pframe_qindex) {
   AV1_COMMON *cm = &cpi->common;
@@ -1722,6 +1769,8 @@
     ++extend_frame_count;
     ++frame_display_index;
   }
+
+  return extend_frame_count;
 }
 
 void av1_init_tpl_stats(TplParams *const tpl_data) {
@@ -1856,6 +1905,72 @@
   return exp((mc_dep_cost_base - intra_cost_base) / cbcmp_base);
 }
 
+void av1_free_tpl_gop_stats(AomTplGopStats *tpl_gop_stats) {
+  if (tpl_gop_stats == NULL || tpl_gop_stats->frame_stats_list == NULL) {
+    return;
+  }
+  for (int frame_index = 0; frame_index < tpl_gop_stats->size; ++frame_index) {
+    aom_free(tpl_gop_stats->frame_stats_list[frame_index].block_stats_list);
+    tpl_gop_stats->frame_stats_list[frame_index].block_stats_list = NULL;
+  }
+  aom_free(tpl_gop_stats->frame_stats_list);
+  tpl_gop_stats->frame_stats_list = NULL;
+  tpl_gop_stats->size = 0;
+}
+
+static void init_tpl_stats_before_propagation(
+    struct aom_internal_error_info *error_info, AomTplGopStats *tpl_gop_stats,
+    TplParams *tpl_stats, int tpl_gop_frames, int frame_width,
+    int frame_height) {
+  av1_free_tpl_gop_stats(tpl_gop_stats);
+  AOM_CHECK_MEM_ERROR(
+      error_info, tpl_gop_stats->frame_stats_list,
+      aom_calloc(tpl_gop_frames, sizeof(*tpl_gop_stats->frame_stats_list)));
+  tpl_gop_stats->size = tpl_gop_frames;
+  for (int frame_index = 0; frame_index < tpl_gop_frames; ++frame_index) {
+    const int mi_rows = tpl_stats->tpl_frame[frame_index].mi_rows;
+    const int mi_cols = tpl_stats->tpl_frame[frame_index].mi_cols;
+    AOM_CHECK_MEM_ERROR(
+        error_info,
+        tpl_gop_stats->frame_stats_list[frame_index].block_stats_list,
+        aom_calloc(mi_rows * mi_cols,
+                   sizeof(*tpl_gop_stats->frame_stats_list[frame_index]
+                               .block_stats_list)));
+    tpl_gop_stats->frame_stats_list[frame_index].num_blocks = mi_rows * mi_cols;
+    tpl_gop_stats->frame_stats_list[frame_index].frame_width = frame_width;
+    tpl_gop_stats->frame_stats_list[frame_index].frame_height = frame_height;
+  }
+}
+
+static void trim_tpl_stats(struct aom_internal_error_info *error_info,
+                           AomTplGopStats *tpl_gop_stats, int extra_frames) {
+  int i;
+  AomTplFrameStats *new_frame_stats;
+  const int new_size = tpl_gop_stats->size - extra_frames;
+  if (tpl_gop_stats->size <= extra_frames)
+    aom_internal_error(
+        error_info, AOM_CODEC_ERROR,
+        "The number of frames in AomTplGopStats is fewer than expected.");
+  AOM_CHECK_MEM_ERROR(error_info, new_frame_stats,
+                      aom_calloc(new_size, sizeof(*new_frame_stats)));
+  for (i = 0; i < new_size; i++) {
+    AomTplFrameStats *frame_stats = &tpl_gop_stats->frame_stats_list[i];
+    const int num_blocks = frame_stats->num_blocks;
+    new_frame_stats[i].num_blocks = frame_stats->num_blocks;
+    new_frame_stats[i].frame_width = frame_stats->frame_width;
+    new_frame_stats[i].frame_height = frame_stats->frame_height;
+    new_frame_stats[i].num_blocks = num_blocks;
+    AOM_CHECK_MEM_ERROR(
+        error_info, new_frame_stats[i].block_stats_list,
+        aom_calloc(num_blocks, sizeof(*new_frame_stats[i].block_stats_list)));
+    memcpy(new_frame_stats[i].block_stats_list, frame_stats->block_stats_list,
+           num_blocks * sizeof(*new_frame_stats[i].block_stats_list));
+  }
+  av1_free_tpl_gop_stats(tpl_gop_stats);
+  tpl_gop_stats->size = new_size;
+  tpl_gop_stats->frame_stats_list = new_frame_stats;
+}
+
 int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
                         const EncodeFrameParams *const frame_params) {
 #if CONFIG_COLLECT_COMPONENT_TIMING
@@ -1890,13 +2005,17 @@
 
   int pframe_qindex;
   int tpl_gf_group_frames;
-  init_gop_frames_for_tpl(cpi, frame_params, gf_group, &tpl_gf_group_frames,
-                          &pframe_qindex);
+  int extended_frame_count = init_gop_frames_for_tpl(
+      cpi, frame_params, gf_group, &tpl_gf_group_frames, &pframe_qindex);
 
   cpi->ppi->p_rc.base_layer_qp = pframe_qindex;
 
   av1_init_tpl_stats(tpl_data);
 
+  init_tpl_stats_before_propagation(cpi->common.error, &cpi->tpl_gop_stats,
+                                    tpl_data, tpl_gf_group_frames,
+                                    cpi->common.width, cpi->common.height);
+
   TplBuffers *tpl_tmp_buffers = &cpi->td.tpl_tmp_buffers;
   if (!tpl_alloc_temp_buffers(tpl_tmp_buffers, tpl_data->tpl_bsize_1d)) {
     aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR,
@@ -1962,6 +2081,20 @@
                              num_planes);
   }
 
+  if (cpi->ext_ratectrl.ready &&
+      cpi->ext_ratectrl.funcs.send_tpl_gop_stats != NULL) {
+    // TPL stats has extra frames from next GOP. Trim those extra frames for
+    // external RC.
+    trim_tpl_stats(cpi->common.error, &cpi->tpl_gop_stats,
+                   extended_frame_count);
+    const aom_codec_err_t codec_status =
+        av1_extrc_send_tpl_stats(&cpi->ext_ratectrl, &cpi->tpl_gop_stats);
+    if (codec_status != AOM_CODEC_OK) {
+      aom_internal_error(cpi->common.error, codec_status,
+                         "av1_extrc_send_tpl_stats() failed");
+    }
+  }
+
   for (int frame_idx = tpl_gf_group_frames - 1;
        frame_idx >= cpi->gf_frame_index; --frame_idx) {
     if (skip_tpl_for_frame(gf_group, frame_idx, gop_eval, approx_gop_eval,
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index 3690671..68498aa 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -29,6 +29,7 @@
 
 #include "config/aom_config.h"
 
+#include "aom/aom_tpl.h"
 #include "aom_scale/yv12config.h"
 #include "aom_util/aom_pthread.h"
 
@@ -686,6 +687,12 @@
 double av1_tpl_compute_frame_mv_entropy(const TplDepFrame *tpl_frame,
                                         uint8_t right_shift);
 
+/*!\brief Free the memory allocated for cpi->tpl_gop_stats.
+ *
+ * \param[in] tpl_gop_stats TPL stats for the GOP used for external RC.
+ */
+void av1_free_tpl_gop_stats(AomTplGopStats *tpl_gop_stats);
+
 #if CONFIG_RATECTRL_LOG
 typedef struct {
   int coding_frame_count;
diff --git a/test/ext_ratectrl_test.cc b/test/ext_ratectrl_test.cc
index a84def7..4027025 100644
--- a/test/ext_ratectrl_test.cc
+++ b/test/ext_ratectrl_test.cc
@@ -36,6 +36,9 @@
 // A flag to indicate if send_firstpass_stats() is called.
 bool is_send_firstpass_stats_called = false;
 
+// A flag to indicate if send_tpl_gop_stats() is called.
+bool is_send_tpl_gop_stats_called = false;
+
 aom_rc_status_t mock_create_model(void *priv,
                                   const aom_rc_config_t *ratectrl_config,
                                   aom_rc_model_t *ratectrl_model) {
@@ -65,6 +68,16 @@
   return AOM_RC_OK;
 }
 
+aom_rc_status_t mock_send_tpl_gop_stats(aom_rc_model_t ratectrl_model,
+                                        const AomTplGopStats *tpl_gop_stats) {
+  EXPECT_NE(ratectrl_model, nullptr);
+  EXPECT_NE(tpl_gop_stats, nullptr);
+  EXPECT_GT(tpl_gop_stats->size, 0);
+  EXPECT_NE(tpl_gop_stats->frame_stats_list, nullptr);
+  is_send_tpl_gop_stats_called = true;
+  return AOM_RC_OK;
+}
+
 aom_rc_status_t mock_get_encodeframe_decision(
     aom_rc_model_t ratectrl_model, const int frame_gop_index,
     aom_rc_encodeframe_decision_t *frame_decision) {
@@ -91,6 +104,7 @@
     rc_funcs->create_model = mock_create_model;
     rc_funcs->delete_model = mock_delete_model;
     rc_funcs->send_firstpass_stats = mock_send_firstpass_stats;
+    rc_funcs->send_tpl_gop_stats = mock_send_tpl_gop_stats;
     rc_funcs->get_encodeframe_decision = mock_get_encodeframe_decision;
     rc_funcs->update_encodeframe_result = mock_update_encodeframe_result;
   }
@@ -103,6 +117,7 @@
     is_create_model_called = false;
     is_delete_model_called = false;
     is_send_firstpass_stats_called = false;
+    is_send_tpl_gop_stats_called = false;
   }
 
   void PreEncodeFrameHook(::libaom_test::VideoSource *video,
@@ -122,6 +137,7 @@
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   EXPECT_TRUE(is_create_model_called);
   EXPECT_TRUE(is_send_firstpass_stats_called);
+  EXPECT_TRUE(is_send_tpl_gop_stats_called);
   EXPECT_TRUE(is_delete_model_called);
 }