AV1 RT: implement reduced mode set for speed9 with zeromv

Only for speed9 lowres. 0.5% BDrate degradation with 3-4% speed up son
speed9 lowres. Also fixes the bug in GLOBALMV usage. Doesn't require
actual global motion code as globalMV predictors are set to 0 by
default. Also fixing a bug in threshold skip

Change-Id: Ib4714c7f3f31eda813e6ee027fab1903aa557d8f
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index b9d5cd9..0891323 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -63,8 +63,10 @@
   { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
 };
 
-#define RT_INTER_MODES 9
-static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
+#define NUM_INTER_MODES_RT 9
+#define NUM_INTER_MODES_REDUCED 8
+
+static const REF_MODE ref_mode_set_rt[NUM_INTER_MODES_RT] = {
   { LAST_FRAME, NEARESTMV },   { LAST_FRAME, NEARMV },
   { LAST_FRAME, NEWMV },       { GOLDEN_FRAME, NEARESTMV },
   { GOLDEN_FRAME, NEARMV },    { GOLDEN_FRAME, NEWMV },
@@ -72,6 +74,15 @@
   { ALTREF_FRAME, NEWMV }
 };
 
+// GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
+// mode
+static const REF_MODE ref_mode_set_reduced[NUM_INTER_MODES_REDUCED] = {
+  { LAST_FRAME, GLOBALMV },   { LAST_FRAME, NEARESTMV },
+  { GOLDEN_FRAME, GLOBALMV }, { LAST_FRAME, NEARMV },
+  { LAST_FRAME, NEWMV },      { GOLDEN_FRAME, NEARESTMV },
+  { GOLDEN_FRAME, NEARMV },   { GOLDEN_FRAME, NEWMV }
+};
+
 static const THR_MODES mode_idx[REF_FRAMES][4] = {
   { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
   { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
@@ -216,6 +227,9 @@
     int i;
     for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
   }
+  // Final MV can not be equal to referance MV as this will trigger assert
+  // later. This can happen if both NEAREST and NEAR modes were skipped
+  rv = (tmp_mv->as_mv.col != ref_mv.col || tmp_mv->as_mv.row != ref_mv.row);
   return rv;
 }
 
@@ -355,6 +369,7 @@
     av1_find_best_ref_mvs_from_stack(
         cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
         &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
+    frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
     // Early exit for non-LAST frame if force_skip_low_temp_var is set.
     if (!av1_is_scaled(sf) && bsize >= BLOCK_8X8 &&
         !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
@@ -1254,7 +1269,7 @@
   const BLOCK_SIZE min_size = AOMMAX(bsize - 3, BLOCK_4X4);
   const BLOCK_SIZE max_size = AOMMIN(bsize + 6, BLOCK_128X128);
   for (BLOCK_SIZE bs = min_size; bs <= max_size; bs += 3) {
-    int *freq_fact = &x->thresh_freq_fact[bsize][thr_mode_idx];
+    int *freq_fact = &x->thresh_freq_fact[bs][thr_mode_idx];
     if (thr_mode_idx == best_mode_idx) {
       *freq_fact -= (*freq_fact >> 4);
     } else {
@@ -1785,7 +1800,7 @@
   for (int i = 0; i < 4; ++i) {
     const PREDICTION_MODE this_mode = intra_mode_list[i];
     const THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)];
-    const int mode_rd_thresh = rd_threshes[mode_index];
+    const int64_t mode_rd_thresh = rd_threshes[mode_index];
 
     if (!((1 << this_mode) & cpi->sf.rt_sf.intra_y_mode_bsize_mask_nrd[bsize]))
       continue;
@@ -1877,8 +1892,9 @@
     int extra_shift) {
   int skip_this_mode = 0;
   const THR_MODES mode_index = mode_idx[ref_frame][INTER_OFFSET(mode)];
-  int mode_rd_thresh = best_skip ? rd_threshes[mode_index] << (extra_shift + 1)
-                                 : rd_threshes[mode_index] << extra_shift;
+  int64_t mode_rd_thresh =
+      best_skip ? ((int64_t)rd_threshes[mode_index]) << (extra_shift + 1)
+                : ((int64_t)rd_threshes[mode_index]) << extra_shift;
 
   // Increase mode_rd_thresh value for non-LAST for improved encoding
   // speed
@@ -1914,7 +1930,7 @@
 
 static AOM_INLINE int skip_mode_by_bsize_and_ref_frame(
     PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, BLOCK_SIZE bsize,
-    int extra_prune, unsigned int sse_zeromv_norm) {
+    int extra_prune, unsigned int sse_zeromv_norm, int more_prune) {
   const unsigned int thresh_skip_golden = 500;
 
   if (ref_frame != LAST_FRAME && sse_zeromv_norm < thresh_skip_golden &&
@@ -1930,6 +1946,8 @@
       return 1;
 
     if (ref_frame != LAST_FRAME && mode == NEARMV) return 1;
+
+    if (more_prune && bsize >= BLOCK_32X32 && mode == NEARMV) return 1;
   }
   return 0;
 }
@@ -1961,7 +1979,12 @@
   int force_skip_low_temp_var = 0;
   int use_ref_frame_mask[REF_FRAMES] = { 0 };
   unsigned int sse_zeromv_norm = UINT_MAX;
-  int num_inter_modes = RT_INTER_MODES;
+  const int num_inter_modes = cpi->sf.rt_sf.nonrd_agressive_skip
+                                  ? NUM_INTER_MODES_REDUCED
+                                  : NUM_INTER_MODES_RT;
+  const REF_MODE *const ref_mode_set = cpi->sf.rt_sf.nonrd_agressive_skip
+                                           ? ref_mode_set_reduced
+                                           : ref_mode_set_rt;
   PRED_BUFFER tmp[4];
   DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 128 * 128]);
   PRED_BUFFER *this_mode_pred = NULL;
@@ -2105,9 +2128,9 @@
         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
       continue;
 
-    if (skip_mode_by_bsize_and_ref_frame(this_mode, ref_frame, bsize,
-                                         x->nonrd_prune_ref_frame_search,
-                                         sse_zeromv_norm))
+    if (skip_mode_by_bsize_and_ref_frame(
+            this_mode, ref_frame, bsize, x->nonrd_prune_ref_frame_search,
+            sse_zeromv_norm, cpi->sf.rt_sf.nonrd_agressive_skip))
       continue;
 
     if (skip_mode_by_low_temp(this_mode, ref_frame, bsize, x->content_state_sb,
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 148307d..73c575f 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -271,9 +271,9 @@
   }
 }
 
-static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
+static INLINE int rd_less_than_thresh(int64_t best_rd, int64_t thresh,
                                       int thresh_fact) {
-  return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
+  return best_rd < (thresh * thresh_fact >> 5) || thresh == INT_MAX;
 }
 
 void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x,
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 043c45e..e773511 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -723,7 +723,7 @@
     for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
       ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
           << " The datarate for the file is lower than target by too much!";
-      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.4)
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45)
           << " The datarate for the file is greater than target by too much!";
     }
   }