rtc: Somes fixes to nonrd_pickmode, speed >= 7

For spatially flat blocks:
 -reduce intra_cost penalty if best inter mode is
large motion or non-LAST reference
 -forced DC intra to be checked for big blocks

Improves quality for uncovered areas.

Neutral bdrate on rtc_derf set.
No speed change observed on QVGA test clip where
the above change has positive visual effect.

Change-Id: I4aa128961344fffbe6d65af3ba21c94dfa11a1bf
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 3060a36..90a202b 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1417,9 +1417,9 @@
 #if COLLECT_PICK_MODE_STAT
   aom_usec_timer_start(&ms_stat.timer2);
 #endif
-  const int intra_cost_penalty = av1_get_intra_cost_penalty(
+  int intra_cost_penalty = av1_get_intra_cost_penalty(
       cm->base_qindex, cm->y_dc_delta_q, cm->seq_params.bit_depth);
-  const int64_t inter_mode_thresh = RDCOST(x->rdmult, intra_cost_penalty, 0);
+  int64_t inter_mode_thresh = RDCOST(x->rdmult, intra_cost_penalty, 0);
   const int perform_intra_pred = cpi->sf.check_intra_pred_nonrd;
 
   (void)best_rd_so_far;
@@ -1431,8 +1431,6 @@
   // TODO(kyslov) Move this to Speed Features
   inter_mode_mask[BLOCK_128X128] = INTER_NEAREST_NEAR;
 
-  x->source_variance = UINT_MAX;
-
   struct scale_factors *const sf_last = get_ref_scale_factors(cm, LAST_FRAME);
   struct scale_factors *const sf_golden =
       get_ref_scale_factors(cm, GOLDEN_FRAME);
@@ -1877,7 +1875,21 @@
   mi->angle_delta[PLANE_TYPE_Y] = 0;
   mi->angle_delta[PLANE_TYPE_UV] = 0;
   mi->filter_intra_mode_info.use_filter_intra = 0;
-  // TODO(kyslov@) Need to adjust inter_mode_thresh
+
+  // Some adjustments to checking intra mode based on source variance.
+  if (x->source_variance < 50) {
+    // If the best inter mode is large motion or non-LAST ref reduce intra cost
+    // penalty, so intra mode is more likely tested.
+    if (best_pickmode.best_ref_frame != LAST_FRAME ||
+        abs(mi->mv[0].as_mv.row) > 32 || abs(mi->mv[0].as_mv.col) > 32) {
+      intra_cost_penalty = intra_cost_penalty >> 2;
+      inter_mode_thresh = RDCOST(x->rdmult, intra_cost_penalty, 0);
+    }
+    // For big blocks worth checking intra (since only DC will be checked),
+    // even if best_early_term is set.
+    if (bsize >= BLOCK_32X32) best_early_term = 0;
+  }
+
   if (best_rdc.rdcost == INT64_MAX || (perform_intra_pred && !best_early_term &&
                                        best_rdc.rdcost > inter_mode_thresh &&
                                        bsize <= cpi->sf.max_intra_bsize)) {