RTC: Allow mode entropy cost update on some frames

Performance:
| SPD_SET | TESTSET | AVG_PSNR | OVR_PSNR |  SSIM   |  SPD  |
|---------|---------|----------|----------|---------|-------|
|    9    |   rtc   | -0.394%  | -0.408%  | -0.641% | +0.6% |
|    9    |rtc_derf | +0.089%  | +0.063%  | -0.120% | +0.1% |
|---------|---------|----------|----------|---------|-------|
|   10    |   rtc   | -0.021%  | -0.094%  | -0.127% | +0.5% |
|   10    |rtc_derf | -0.029%  | -0.055%  | -0.188% | +0.1% |

STATS_CHANGED

Change-Id: I1cd417a121bff73d8de1a0fe66cd10c968ba6fb0
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index ebe2640..dd45586 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -15,9 +15,7 @@
 
 #include "av1/encoder/encoder.h"
 #include "av1/encoder/encodeframe_utils.h"
-#include "av1/encoder/partition_strategy.h"
 #include "av1/encoder/rdopt.h"
-#include "av1/encoder/aq_variance.h"
 
 void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
                          const BLOCK_SIZE bsize, const int mi_row,
@@ -1578,6 +1576,10 @@
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
 
+  if (cm->features.disable_cdf_update) {
+    return;
+  }
+
   switch (cpi->sf.inter_sf.coeff_cost_upd_level) {
     case INTERNAL_COST_UPD_OFF:
     case INTERNAL_COST_UPD_TILE:  // Tile level
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index e528458..ab24261 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3237,9 +3237,9 @@
 
 // Conditions to disable cdf_update mode in selective mode for real-time.
 // Handle case for layers, scene change, and resizing.
-static int selective_disable_cdf_rtc(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
+static AOM_INLINE int selective_disable_cdf_rtc(const AV1_COMP *cpi) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const RATE_CONTROL *const rc = &cpi->rc;
   // For single layer.
   if (cpi->svc.number_spatial_layers == 1 &&
       cpi->svc.number_temporal_layers == 1) {
@@ -3380,6 +3380,10 @@
 
   cpi->last_frame_type = current_frame->frame_type;
 
+  if (frame_is_intra_only(cm)) {
+    cpi->frames_since_last_update = 0;
+  }
+
   if (frame_is_sframe(cm)) {
     GF_GROUP *gf_group = &cpi->ppi->gf_group;
     // S frame will wipe out any previously encoded altref so we cannot place
@@ -3681,6 +3685,12 @@
 
   cpi->last_frame_type = current_frame->frame_type;
 
+  if (cm->features.disable_cdf_update) {
+    cpi->frames_since_last_update++;
+  } else {
+    cpi->frames_since_last_update = 1;
+  }
+
   // Clear the one shot update flags for segmentation map and mode/ref loop
   // filter deltas.
   cm->seg.update_map = 0;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 888b0f6..d009c53 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -3381,6 +3381,11 @@
    */
   DuckyEncodeInfo ducky_encode_info;
 #endif  // CONFIG_REALTIME_ONLY
+        //
+  /*!
+   * Frames since last frame with cdf update.
+   */
+  int frames_since_last_update;
 } AV1_COMP;
 
 /*!
@@ -4021,8 +4026,8 @@
          cm->show_frame;
 }
 
-static INLINE int is_frame_resize_pending(AV1_COMP *const cpi) {
-  ResizePendingParams *const resize_pending_params =
+static INLINE int is_frame_resize_pending(const AV1_COMP *const cpi) {
+  const ResizePendingParams *const resize_pending_params =
       &cpi->resize_pending_params;
   return (resize_pending_params->width && resize_pending_params->height &&
           (cpi->common.width != resize_pending_params->width ||
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 17c7960..665ea48 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -14,8 +14,6 @@
 #include <math.h>
 #include <stdio.h>
 
-#include "config/av1_rtcd.h"
-
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/bitops.h"
@@ -25,23 +23,17 @@
 #include "av1/common/common.h"
 #include "av1/common/entropy.h"
 #include "av1/common/entropymode.h"
-#include "av1/common/mvref_common.h"
 #include "av1/common/pred_common.h"
 #include "av1/common/quant_common.h"
 #include "av1/common/reconinter.h"
 #include "av1/common/reconintra.h"
 #include "av1/common/seg_common.h"
 
-#include "av1/encoder/av1_quantize.h"
 #include "av1/encoder/cost.h"
-#include "av1/encoder/encodemb.h"
 #include "av1/encoder/encodemv.h"
 #include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/mcomp.h"
 #include "av1/encoder/ratectrl.h"
 #include "av1/encoder/rd.h"
-#include "av1/encoder/tokenize.h"
 
 #define RD_THRESH_POW 1.25
 
@@ -706,6 +698,34 @@
           cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
 }
 
+// Decide whether we want to update the mode entropy cost for the current frame.
+// The logit is currently inherited from selective_disable_cdf_rtc.
+static AOM_INLINE int should_force_mode_cost_update(const AV1_COMP *cpi) {
+  const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
+  if (!rt_sf->frame_level_mode_cost_update) {
+    return false;
+  }
+
+  if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
+    return cpi->frames_since_last_update == 1;
+  } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
+    if (cpi->svc.number_spatial_layers == 1 &&
+        cpi->svc.number_temporal_layers == 1) {
+      const AV1_COMMON *const cm = &cpi->common;
+      const RATE_CONTROL *const rc = &cpi->rc;
+
+      return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
+             rc->high_source_sad || rc->frames_since_key < 10 ||
+             cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
+             cm->current_frame.frame_number % 8 == 0;
+    } else if (cpi->svc.number_temporal_layers > 1) {
+      return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
+    }
+  }
+
+  return false;
+}
+
 void av1_initialize_rd_consts(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
@@ -744,7 +764,8 @@
     av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
 
   // Frame level mode cost update
-  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
+  if (should_force_mode_cost_update(cpi) ||
+      is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
                                        use_nonrd_pick_mode, frames_since_key))
     av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
 
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 616172e..7154991 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1673,8 +1673,8 @@
     sf->rt_sf.partition_direct_merging = 1;
   }
   if (speed >= 9) {
-    sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
     sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
+    sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
     sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 20;
     sf->rt_sf.estimate_motion_for_var_based_partition = 0;
     sf->rt_sf.force_large_partition_blocks = 1;
@@ -1684,6 +1684,7 @@
       sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
     sf->rt_sf.var_part_based_on_qidx = 0;
     sf->rt_sf.prune_global_globalmv_with_globalmv = true;
+    sf->rt_sf.frame_level_mode_cost_update = true;
   }
   if (speed >= 10) {
     sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_4;
@@ -2022,6 +2023,7 @@
   rt_sf->reduce_zeromv_mvres = false;
   rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false;
   rt_sf->prune_global_globalmv_with_globalmv = false;
+  rt_sf->frame_level_mode_cost_update = false;
 }
 
 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 2f431f0..77770b0 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1569,6 +1569,11 @@
   // Prunes global_globalmv search if its variance is \gt the globalmv's
   // variance.
   bool prune_global_globalmv_with_globalmv;
+
+  // Allow mode cost update at frame level every couple frames. This
+  // overrides the command line setting --mode-cost-upd-freq=3 (never update
+  // except on key frame and first delta).
+  bool frame_level_mode_cost_update;
 } REAL_TIME_SPEED_FEATURES;
 
 /*!\endcond */