TPL propagation model kernel

Re-design the tpl core propagation process. Estimate and track
the impact on the rate-distortion cost of subsequent blocks.

STATS_CHANGED

Change-Id: I90b0b9aa292116d8d768ff038ba3a1da646a2970
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 31b4d27..60f3e23 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3595,8 +3595,8 @@
       if (row >= cm->mi_rows || col >= mi_cols_sr) continue;
       TplDepStats *this_stats =
           &tpl_stats[av1_tpl_ptr_pos(cpi, row, col, tpl_stride)];
-      intra_cost += this_stats->intra_cost;
-      mc_dep_cost += this_stats->intra_cost + this_stats->mc_flow;
+      intra_cost += this_stats->recrf_dist;
+      mc_dep_cost += this_stats->recrf_dist + this_stats->mc_dep_delta;
 #if !USE_TPL_CLASSIC_MODEL
       mc_count += this_stats->mc_count;
       mc_saved += this_stats->mc_saved;
@@ -3740,8 +3740,8 @@
       if (row >= cm->mi_rows || col >= mi_cols_sr) continue;
       TplDepStats *this_stats =
           &tpl_stats[av1_tpl_ptr_pos(cpi, row, col, tpl_stride)];
-      intra_cost += this_stats->intra_cost;
-      mc_dep_cost += this_stats->intra_cost + this_stats->mc_flow;
+      intra_cost += this_stats->recrf_dist;
+      mc_dep_cost += this_stats->recrf_dist + this_stats->mc_dep_delta;
 #if !USE_TPL_CLASSIC_MODEL
       mc_count += this_stats->mc_count;
       mc_saved += this_stats->mc_saved;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index b0bdf65..d5c6858 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3944,8 +3944,8 @@
       for (int col = 0; col < mi_cols_sr; col += step) {
         TplDepStats *this_stats =
             &tpl_stats[av1_tpl_ptr_pos(cpi, row, col, tpl_stride)];
-        intra_cost_base += this_stats->intra_cost;
-        mc_dep_cost_base += this_stats->intra_cost + this_stats->mc_flow;
+        intra_cost_base += this_stats->recrf_dist;
+        mc_dep_cost_base += this_stats->recrf_dist + this_stats->mc_dep_delta;
 #if !USE_TPL_CLASSIC_MODEL
         mc_count_base += this_stats->mc_count;
         mc_saved_base += this_stats->mc_saved;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index c8870c9..58422c4 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -166,6 +166,9 @@
   int64_t inter_cost;
   int64_t mc_flow;
   int64_t mc_dep_cost;
+  int64_t srcrf_dist;
+  int64_t recrf_dist;
+  int64_t mc_dep_delta;
   int_mv mv;
   int ref_frame_index;
   double quant_ratio;
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 6686467..cfed647 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -279,6 +279,8 @@
   tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
   tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
 
+  tpl_stats->srcrf_dist = *recon_error << TPL_DEP_COST_SCALE_LOG2;
+
   // Final encode
   if (is_inter_mode(best_mode)) {
     ConvolveParams conv_params = get_conv_params(0, 0, xd->bd);
@@ -325,6 +327,12 @@
   av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer,
                               dst_buffer_stride, eob, 0);
 
+  tpl_stats->recrf_dist = *recon_error << TPL_DEP_COST_SCALE_LOG2;
+  if (!is_inter_mode(best_mode))
+    tpl_stats->srcrf_dist = *recon_error << TPL_DEP_COST_SCALE_LOG2;
+
+  tpl_stats->recrf_dist = AOMMAX(tpl_stats->srcrf_dist, tpl_stats->recrf_dist);
+
   if (frame_idx && best_rf_idx != -1) {
     tpl_stats->mv.as_int = best_mv.as_int;
     tpl_stats->ref_frame_index =
@@ -423,6 +431,14 @@
       int64_t mc_flow =
           (int64_t)(tpl_stats_ptr->quant_ratio * tpl_stats_ptr->mc_dep_cost *
                     (1.0 - iiratio_nl));
+
+      int64_t cur_dep_cost =
+          tpl_stats_ptr->recrf_dist - tpl_stats_ptr->srcrf_dist;
+      int64_t mc_dep_delta =
+          (tpl_stats_ptr->mc_dep_delta *
+           (tpl_stats_ptr->recrf_dist - tpl_stats_ptr->srcrf_dist)) /
+          tpl_stats_ptr->recrf_dist;
+
 #if !USE_TPL_CLASSIC_MODEL
       int64_t mc_saved = tpl_stats_ptr->intra_cost - tpl_stats_ptr->inter_cost;
 #endif  // #if !USE_TPL_CLASSIC_MODEL
@@ -436,6 +452,9 @@
           des_stats->mc_count += overlap_area << TPL_DEP_COST_SCALE_LOG2;
           des_stats->mc_saved += (mc_saved * overlap_area) / pix_num;
 #endif  // !USE_TPL_CLASSIC_MODEL
+
+          des_stats->mc_dep_delta +=
+              ((cur_dep_cost + mc_dep_delta) * overlap_area) / pix_num;
           assert(overlap_area >= 0);
         }
       }
@@ -472,8 +491,13 @@
 
   int64_t intra_cost = src_stats->intra_cost / (mi_height * mi_width);
   int64_t inter_cost = src_stats->inter_cost / (mi_height * mi_width);
+  int64_t srcrf_dist = src_stats->srcrf_dist / (mi_height * mi_width);
+  int64_t recrf_dist = src_stats->recrf_dist / (mi_height * mi_width);
+
   intra_cost = AOMMAX(1, intra_cost);
   inter_cost = AOMMAX(1, inter_cost);
+  srcrf_dist = AOMMAX(1, srcrf_dist);
+  recrf_dist = AOMMAX(1, recrf_dist);
 
   for (int idy = 0; idy < mi_height; idy += step) {
     TplDepStats *tpl_ptr =
@@ -481,6 +505,8 @@
     for (int idx = 0; idx < mi_width; idx += step) {
       tpl_ptr->intra_cost = intra_cost;
       tpl_ptr->inter_cost = inter_cost;
+      tpl_ptr->srcrf_dist = srcrf_dist;
+      tpl_ptr->recrf_dist = recrf_dist;
       tpl_ptr->quant_ratio = src_stats->quant_ratio;
       tpl_ptr->mv.as_int = src_stats->mv.as_int;
       tpl_ptr->ref_frame_index = src_stats->ref_frame_index;
@@ -1174,8 +1200,9 @@
           if (mi_row >= cm->mi_rows || mi_col >= mi_cols_sr) continue;
           const TplDepStats *this_stats =
               &tpl_stats[av1_tpl_ptr_pos(cpi, mi_row, mi_col, tpl_stride)];
-          intra_cost += (double)this_stats->intra_cost;
-          mc_dep_cost += (double)this_stats->intra_cost + this_stats->mc_flow;
+          intra_cost += (double)this_stats->recrf_dist;
+          mc_dep_cost +=
+              (double)this_stats->recrf_dist + this_stats->mc_dep_delta;
         }
       }
       const double rk = intra_cost / mc_dep_cost;