rtc: Speed 11 for video mode, for resoln < 720p.

This is a very aggressive fast speed setting for
video in real-time mode (RTC).

-force single reference
-aggressive cdef skip
-selective cdf_update
-turn off motion interpolation filter search
-use only DC for intra-modes
-increase thresholds for source_sad and transform
 skipping test
-faster partitioning - fixed per superblock

bdrate and IC change from speed 10 rtc_derf:
~29% IC speedup and ~26% bdrate loss

To be tuned/adjusted further.

Change-Id: Ia14d35d6d6ba81532e904c136a7ca388ed4674a4
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index d73840c..a18d294 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -535,11 +535,17 @@
   }
 #endif
   // Set the partition
-  if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
+  if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
+      (sf->rt_sf.use_fast_fixed_part &&
+       x->content_state_sb.source_sad_nonrd < kMedSad)) {
     // set a fixed-size partition
     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
-    const BLOCK_SIZE bsize =
-        seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
+    BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
+    if (sf->rt_sf.use_fast_fixed_part &&
+        x->content_state_sb.source_sad_nonrd < kLowSad) {
+      bsize_select = BLOCK_64X64;
+    }
+    const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
   } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
     // set a variance-based partition
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 94298c8..7654864 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -1398,6 +1398,11 @@
                                                36000 };  // ~3*3*(64*64)
 
   uint64_t avg_source_sse_threshold_high = 1000000;  // ~15*15*(64*64)
+  if (cpi->sf.rt_sf.increase_source_sad_thresh) {
+    avg_source_sse_threshold_high = avg_source_sse_threshold_high << 1;
+    avg_source_sse_threshold_low[0] = avg_source_sse_threshold_low[0] << 1;
+    avg_source_sse_threshold_verylow = avg_source_sse_threshold_verylow << 1;
+  }
   uint64_t sum_sq_thresh = 10000;  // sum = sqrt(thresh / 64*64)) ~1.5
   src_y += src_offset;
   last_src_y += last_src_offset;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 117150f..452a3a4 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3821,6 +3821,8 @@
       if (cpi->sf.rt_sf.disable_cdf_update_non_reference_frame &&
           cpi->ppi->rtc_ref.non_reference_frame && cpi->rc.frames_since_key > 2)
         features->disable_cdf_update = 1;
+      else if (cpi->sf.rt_sf.selective_cdf_update)
+        features->disable_cdf_update = selective_disable_cdf_rtc(cpi);
       else
         features->disable_cdf_update = 0;
       break;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 1836869..f939b6d 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -577,7 +577,7 @@
   struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
   const uint32_t dc_quant = p->dequant_QTX[0];
   const uint32_t ac_quant = p->dequant_QTX[1];
-  const int64_t dc_thr = dc_quant * dc_quant >> 6;
+  int64_t dc_thr = dc_quant * dc_quant >> 6;
   int64_t ac_thr = ac_quant * ac_quant >> 6;
   const int bw = b_width_log2_lookup[bsize];
   const int bh = b_height_log2_lookup[bsize];
@@ -597,6 +597,11 @@
 
 #endif
 
+  if (cpi->sf.rt_sf.increase_source_sad_thresh) {
+    dc_thr = dc_thr << 1;
+    ac_thr = ac_thr << 2;
+  }
+
   for (int k = 0; k < num_blk; k++) {
     // Check if all ac coefficients can be quantized to zero.
     if (!(var_tx[k] < ac_thr || var == 0)) {
@@ -626,10 +631,12 @@
         const BLOCK_SIZE uv_bsize = get_plane_block_size(
             bsize, puvd->subsampling_x, puvd->subsampling_y);
         // Adjust these thresholds for UV.
+        const int shift_ac = cpi->sf.rt_sf.increase_source_sad_thresh ? 5 : 3;
+        const int shift_dc = cpi->sf.rt_sf.increase_source_sad_thresh ? 4 : 3;
         const int64_t uv_dc_thr =
-            (puv->dequant_QTX[0] * puv->dequant_QTX[0]) >> 3;
+            (puv->dequant_QTX[0] * puv->dequant_QTX[0]) >> shift_dc;
         const int64_t uv_ac_thr =
-            (puv->dequant_QTX[1] * puv->dequant_QTX[1]) >> 3;
+            (puv->dequant_QTX[1] * puv->dequant_QTX[1]) >> shift_ac;
         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
                                       plane, plane);
         var_uv[j] = cpi->ppi->fn_ptr[uv_bsize].vf(puv->src.buf, puv->src.stride,
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index dcc0c5a..2000111 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -2897,10 +2897,12 @@
   for (int i = 0; i < REF_FRAMES; ++i) rtc_ref->refresh[i] = 0;
   // Set the reference frame flags.
   ext_flags->ref_frame_flags ^= AOM_LAST_FLAG;
-  ext_flags->ref_frame_flags ^= AOM_ALT_FLAG;
-  ext_flags->ref_frame_flags ^= AOM_GOLD_FLAG;
-  if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1])
-    ext_flags->ref_frame_flags ^= AOM_LAST2_FLAG;
+  if (!cpi->sf.rt_sf.force_only_last_ref) {
+    ext_flags->ref_frame_flags ^= AOM_ALT_FLAG;
+    ext_flags->ref_frame_flags ^= AOM_GOLD_FLAG;
+    if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1])
+      ext_flags->ref_frame_flags ^= AOM_LAST2_FLAG;
+  }
   const int sh = 6;
   // Moving index slot for last: 0 - (sh - 1).
   if (frame_number > 1) last_idx = ((frame_number - 1) % sh);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 6b18775..4c0a3bd 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1453,7 +1453,25 @@
     if (speed >= 9) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
     if (speed >= 10) sf->rt_sf.nonrd_aggressive_skip = 1;
   }
-
+  // TODO(marpan): Tune settings for speed 11 video mode,
+  // for resolutions below 720p.
+  if (speed >= 11 && !is_720p_or_larger &&
+      cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) {
+    sf->rt_sf.skip_cdef_sb = 2;
+    sf->rt_sf.force_only_last_ref = 1;
+    sf->rt_sf.selective_cdf_update = 1;
+    sf->rt_sf.use_nonrd_filter_search = 0;
+    if (is_360p_or_larger) {
+      sf->part_sf.fixed_partition_size = BLOCK_32X32;
+      sf->rt_sf.use_fast_fixed_part = 1;
+    }
+    sf->rt_sf.increase_source_sad_thresh = 1;
+    sf->rt_sf.part_early_exit_zeromv = 2;
+    sf->rt_sf.set_zeromv_skip_based_on_source_sad = 2;
+    for (int i = 0; i < BLOCK_SIZES; ++i) {
+      sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
+    }
+  }
   // Setting for SVC, or when the ref_frame_config control is
   // used to set the reference structure.
   if (cpi->ppi->use_svc || cpi->ppi->rtc_ref.set_ref_frame_config) {
@@ -1769,6 +1787,8 @@
                 FLAG_EARLY_TERMINATE;
   sf->rt_sf.var_part_split_threshold_shift = 5;
   if (!frame_is_intra_only(&cpi->common)) sf->rt_sf.var_part_based_on_qidx = 1;
+  sf->rt_sf.use_fast_fixed_part = 0;
+  sf->rt_sf.increase_source_sad_thresh = 0;
 
   if (speed >= 6) {
     sf->mv_sf.use_fullpel_costlist = 1;
@@ -2256,6 +2276,8 @@
   rt_sf->enable_ref_short_signaling = false;
   rt_sf->check_globalmv_on_single_ref = true;
   rt_sf->increase_color_thresh_palette = false;
+  rt_sf->selective_cdf_update = 0;
+  rt_sf->force_only_last_ref = 0;
 }
 
 static fractional_mv_step_fp
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 14cd874..d7e8989 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1654,10 +1654,24 @@
   // rc->high_source_sad = 0 (non slide-changes), and color sensitivity off.
   int skip_cdef_sb;
 
+  // Force selective cdf update.
+  int selective_cdf_update;
+
+  // Force only single reference (LAST) for prediction.
+  int force_only_last_ref;
+
   // Forces larger partition blocks in variance based partitioning for intra
   // frames
   int force_large_partition_blocks_intra;
 
+  // Use fixed partition for superblocks based on source_sad.
+  // 0: disabled
+  // 1: enabled
+  int use_fast_fixed_part;
+
+  // Increase source_sad thresholds in nonrd pickmode.
+  int increase_source_sad_thresh;
+
   // Skip evaluation of no split in tx size selection for merge partition
   int skip_tx_no_split_var_based_partition;
 
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index d545600..53abfa0 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -1528,7 +1528,9 @@
     int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
   if (set_zeromv_skip_based_on_source_sad == 0) return false;
 
-  if (set_zeromv_skip_based_on_source_sad >= 2)
+  if (set_zeromv_skip_based_on_source_sad >= 3)
+    return source_sad_nonrd <= kLowSad;
+  else if (set_zeromv_skip_based_on_source_sad >= 2)
     return source_sad_nonrd <= kVeryLowSad;
   else if (set_zeromv_skip_based_on_source_sad >= 1)
     return source_sad_nonrd == kZeroSad;
@@ -1545,12 +1547,13 @@
           cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
           x->content_state_sb.source_sad_nonrd))
     return false;
+  int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0;
   const int block_width = mi_size_wide[cm->seq_params->sb_size];
   const int block_height = mi_size_high[cm->seq_params->sb_size];
   const unsigned int thresh_exit_part_y =
-      cpi->zeromv_skip_thresh_exit_part[bsize];
+      cpi->zeromv_skip_thresh_exit_part[bsize] << shift;
   unsigned int thresh_exit_part_uv =
-      CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
+      CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift;
   // Be more aggressive in UV threshold if source_sad >= VeryLowSad
   // to suppreess visual artifact caused by the speed feature:
   // set_zeromv_skip_based_on_source_sad = 2. For now only for