TPL based qp decisions

Enable the TPL based qp decisions in Q mode. Calculate the quantizer
step size scaling based on the temporal dependency. The coding
performance is improved

         overall PSNR     SSIM
lowres      -0.70%       -0.09%
midres2     -1.26%       -0.75%
ugc480p     -2.48%       -1.29%
hdres2      -1.47%       -1.17%

STATS_CHANGED

Change-Id: Id925ec661c0a00047ad45aad34458620c8a1490c
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 557268f..70edf58 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -542,6 +542,19 @@
   *q = av1_rc_pick_q_and_bounds(cpi, cm->width, cm->height, cpi->gf_frame_index,
                                 bottom_index, top_index);
 
+#if !CONFIG_REALTIME_ONLY
+  if (cpi->oxcf.rc_cfg.mode == AOM_Q &&
+      cpi->ppi->tpl_data.tpl_frame[cpi->gf_frame_index].is_valid &&
+      is_frame_tpl_eligible(gf_group, cpi->gf_frame_index) &&
+      !frame_is_intra_only(cm)) {
+    *q = av1_get_arf_q_index_q_mode(
+        cpi, &cpi->ppi->tpl_data.tpl_frame[cpi->gf_frame_index]);
+    *top_index = *bottom_index = *q;
+    if (gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE)
+      cpi->ppi->p_rc.arf_q = *q;
+  }
+#endif
+
   // Configure experimental use of segmentation for enhanced coding of
   // static regions if indicated.
   // Only allowed in the second pass of a two pass encode, as it requires
diff --git a/av1/encoder/lookahead.h b/av1/encoder/lookahead.h
index af79eb4..c9e1c9a 100644
--- a/av1/encoder/lookahead.h
+++ b/av1/encoder/lookahead.h
@@ -25,8 +25,8 @@
 #endif
 
 /*!\cond */
-#define MAX_LAG_BUFFERS 35
-#define MAX_LAP_BUFFERS 35
+#define MAX_LAG_BUFFERS 48
+#define MAX_LAP_BUFFERS 48
 #define MAX_TOTAL_BUFFERS (MAX_LAG_BUFFERS + MAX_LAP_BUFFERS)
 #define LAP_LAG_IN_FRAMES 17
 
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index c0c0501..68f400e 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -1667,6 +1667,50 @@
   return min_boost - (int)(boost * arf_boost_factor);
 }
 
+#if !CONFIG_REALTIME_ONLY
+// TODO(jingning): Need further refactoring to reduce the data structure
+// access scope.
+int av1_get_arf_q_index_q_mode(AV1_COMP *cpi, TplDepFrame *tpl_frame) {
+  AV1_COMMON *cm = &cpi->common;
+  double lef_qstep = av1_dc_quant_QTX(cpi->rc.active_worst_quality, 0,
+                                      cm->seq_params->bit_depth);
+
+  TplParams *tpl_data = &cpi->ppi->tpl_data;
+  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+
+  int tpl_stride = tpl_frame->stride;
+  int64_t intra_cost_base = 0;
+  int64_t mc_dep_cost_base = 0;
+  int64_t pred_error = 1;
+  int64_t recn_error = 1;
+  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
+
+  for (int row = 0; row < cm->mi_params.mi_rows; row += step) {
+    for (int col = 0; col < cm->mi_params.mi_cols; col += step) {
+      TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
+          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
+      int64_t mc_dep_delta =
+          RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
+                 this_stats->mc_dep_dist);
+      intra_cost_base += (this_stats->recrf_dist << RDDIV_BITS);
+      pred_error += (this_stats->srcrf_sse << RDDIV_BITS);
+      recn_error += (this_stats->srcrf_dist << RDDIV_BITS);
+      mc_dep_cost_base += (this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta;
+    }
+  }
+  double r0 = (double)intra_cost_base / mc_dep_cost_base;
+
+  int arf_qp;
+  double tgt_qstep;
+  for (arf_qp = cpi->rc.active_worst_quality; arf_qp > 0; --arf_qp) {
+    tgt_qstep = av1_dc_quant_QTX(arf_qp, 0, cm->seq_params->bit_depth);
+    if (tgt_qstep + 0.1 <= lef_qstep * sqrt(r0)) break;
+  }
+
+  return arf_qp;
+}
+#endif
+
 static int rc_pick_q_and_bounds_q_mode(const AV1_COMP *cpi, int width,
                                        int height, int gf_index,
                                        int *bottom_index, int *top_index) {
diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
index ef4fa4f..0f59d94 100644
--- a/av1/encoder/ratectrl.h
+++ b/av1/encoder/ratectrl.h
@@ -19,6 +19,7 @@
 
 #include "av1/common/av1_common_int.h"
 #include "av1/common/blockd.h"
+#include "av1/encoder/tpl_model.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -649,6 +650,17 @@
 int av1_get_arf_q_index(int base_q_index, int gfu_boost, int bit_depth,
                         double arf_boost_factor);
 
+#if !CONFIG_REALTIME_ONLY
+/*!\brief Compute the q_indices for the ARF of a GOP in Q mode.
+ *
+ * \param[in]       cpi               Top level encoder structure
+ * \param[in]       tpl_frame         Tpl Frame stats
+ *
+ * \return Returns the q_index for the ARF frame.
+ */
+int av1_get_arf_q_index_q_mode(struct AV1_COMP *cpi,
+                               struct TplDepFrame *tpl_frame);
+#endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 0796174..f158cba 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1390,6 +1390,7 @@
   int extend_frame_length =
       AOMMIN(MAX_TPL_EXTEND,
              cpi->rc.frames_to_key - cpi->ppi->p_rc.baseline_gf_interval);
+
   int frame_display_index = gf_group->cur_frame_idx[gop_length - 1] +
                             gf_group->arf_src_offset[gop_length - 1] + 1;