rtc: Enable use_modeled_rd_cost flag for speed 8

Also enable source_metrics_sb for speed 8, which
is used to constrain the use of model_rd_cost.
Keep the conditions for using modeled_rd_cost at
block level conservative for now.
~1.9/2% avg. bdrate loss on rtc/rtc_derf
~5-6% speedup 720p motion clip, ~3 speedup on lower resoln.

Change-Id: Ia673c928082c6c0a66cfb1dc120af113ce434ab3
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index a440bf2..ab8d818 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4333,8 +4333,8 @@
   int src_ystride = cpi->source->y_stride;
   uint8_t *last_src_y = cpi->last_source->y_buffer;
   int last_src_ystride = cpi->last_source->y_stride;
-  uint64_t avg_source_sse_threshold = 100000;       // ~5*5*(64*64)
-  uint64_t avg_source_sse_threshold_high = 800000;  // ~14*14*(64*64)
+  uint64_t avg_source_sse_threshold = 100000;        // ~5*5*(64*64)
+  uint64_t avg_source_sse_threshold_high = 1000000;  // ~15*15*(64*64)
   uint64_t sum_sq_thresh = 10000;  // sum = sqrt(thresh / 64*64)) ~1.5
 #if CONFIG_AV1_HIGHBITDEPTH
   MACROBLOCKD *xd = &x->e_mbd;
@@ -4346,7 +4346,7 @@
                                        last_src_ystride, &tmp_sse);
   // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
   // Detect large lighting change.
-  if (tmp_variance < (tmp_sse >> 2) && (tmp_sse - tmp_variance) > sum_sq_thresh)
+  if (tmp_variance < (tmp_sse >> 1) && (tmp_sse - tmp_variance) > sum_sq_thresh)
     x->content_state_sb = kLowVarHighSumdiff;
   else if (tmp_sse < avg_source_sse_threshold)
     x->content_state_sb = kLowSad;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 48ca53c..2f53e21 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1675,9 +1675,10 @@
          sizeof(xd->tx_type_map[0]) * ctx->num_4x4_blk);
   av1_zero(x->blk_skip);
 
-  if (cpi->sf.rt_sf.use_modeled_non_rd_cost && cm->base_qindex > 140 &&
-      bsize < BLOCK_32X32 && x->source_variance > 100 &&
-      !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
+  // TODO(marpan): Look into reducing these conditions. For now constrain
+  // it to avoid significant bdrate loss.
+  if (cpi->sf.rt_sf.use_modeled_non_rd_cost && cm->base_qindex > 120 &&
+      x->source_variance > 100 && bsize <= BLOCK_16X16 &&
       x->content_state_sb != kLowVarHighSumdiff &&
       x->content_state_sb != kHighSad)
     use_modeled_non_rd_cost = 1;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 4371333..2c4aa6a 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -865,8 +865,8 @@
     sf->rt_sf.nonrd_prune_ref_frame_search = 2;
     sf->rt_sf.nonrd_check_partition_merge_mode = 0;
     sf->rt_sf.nonrd_check_partition_split = 0;
-    sf->rt_sf.use_modeled_non_rd_cost = 0;
-    sf->rt_sf.source_metrics_sb_nonrd = 0;
+    sf->rt_sf.use_modeled_non_rd_cost = 1;
+    sf->rt_sf.source_metrics_sb_nonrd = 1;
     sf->interp_sf.cb_pred_filter_search = 1;
   }
 }
diff --git a/test/rt_end_to_end_test.cc b/test/rt_end_to_end_test.cc
index 124a719..28fade9 100644
--- a/test/rt_end_to_end_test.cc
+++ b/test/rt_end_to_end_test.cc
@@ -42,7 +42,7 @@
                        { "niklas_1280_720_30.y4m",
                          { { 6, { { 0, 34.2 }, { 3, 34.2 } } },
                            { 7, { { 0, 33.7 }, { 3, 33.7 } } },
-                           { 8, { { 0, 33.7 }, { 3, 33.5 } } } } } };
+                           { 8, { { 0, 33.6 }, { 3, 33.4 } } } } } };
 
 typedef struct {
   const char *filename;