rtc-svc: Avoid applying loopfilter on non_reference

Add rt speed feature to skip applying loopfilter and
cdef on encoder for frames that are non_reference/droppable.
This is gives speedup ~3-4% for 3TL speed 8 vga on x86.
Result is not bitexact but very close, < ~0.01 bdrate on rtc.

Change-Id: I9f256b9549d85e0e2b12065b5fa1fd74ff902750
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 647ce9f..649f694 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -1294,6 +1294,7 @@
   if (!frame_params.show_existing_frame) {
     cm->quant_params.using_qmatrix = oxcf->q_cfg.using_qm;
   }
+
 #if CONFIG_REALTIME_ONLY
   if (av1_encode(cpi, dest, &frame_input, &frame_params, &frame_results) !=
       AOM_CODEC_OK) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index a4e6497..7303eec 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1955,7 +1955,8 @@
                     cpi->sf.lpf_sf.cdef_pick_method, cpi->td.mb.rdmult);
 
     // Apply the filter
-    av1_cdef_frame(&cm->cur_frame->buf, cm, xd);
+    if (!cpi->sf.rt_sf.skip_loopfilter_non_reference)
+      av1_cdef_frame(&cm->cur_frame->buf, cm, xd);
 #if CONFIG_COLLECT_COMPONENT_TIMING
     end_timing(cpi, cdef_time);
 #endif
@@ -2030,7 +2031,8 @@
     lf->filter_level[1] = 0;
   }
 
-  if (lf->filter_level[0] || lf->filter_level[1]) {
+  if ((lf->filter_level[0] || lf->filter_level[1]) &&
+      !cpi->sf.rt_sf.skip_loopfilter_non_reference) {
     if (num_workers > 1)
       av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, xd, 0, num_planes, 0,
 #if CONFIG_LPF_MASK
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index e287649..8340bf3 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -702,6 +702,7 @@
   sf->tx_sf.model_based_prune_tx_search_level = 1;
   sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
   sf->rt_sf.fullpel_search_step_param = 0;
+  sf->rt_sf.skip_loopfilter_non_reference = 0;
 
   if (speed >= 1) {
     sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
@@ -920,6 +921,13 @@
         sf->rt_sf.fullpel_search_step_param = 10;
       }
     }
+    // TODO(marpan): Look into why enabling skip_loopfilter_non_reference is
+    // not bitexact on rtc testset, its very close (< ~0.01 bdrate), but not
+    // always bitexact.
+    if (cpi->use_svc && cpi->svc.non_reference_frame &&
+        sf->lpf_sf.cdef_pick_method == CDEF_PICK_FROM_Q &&
+        sf->lpf_sf.lpf_pick == LPF_PICK_FROM_Q)
+      sf->rt_sf.skip_loopfilter_non_reference = 1;
   }
 
   if (speed >= 8) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index a59a172..7a6051a 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1015,6 +1015,10 @@
   // indicates larger window. If set to 0, step_param is set based on internal
   // logic in set_mv_search_params().
   int fullpel_search_step_param;
+
+  // Skip loopfilter (and cdef) in svc real-time mode for
+  // non_reference/droppable frames.
+  int skip_loopfilter_non_reference;
 } REAL_TIME_SPEED_FEATURES;
 
 /*!\endcond */
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 774593c..13aaa3c 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -442,6 +442,11 @@
       ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.30)
           << " The datarate for the file is greater than target by too much!";
     }
+    // Top temporal layers are non_reference, so exlcude them from
+    // mismatch count, since loopfilter/cdef is not applied for these on
+    // encoder side, but is always applied on decoder.
+    // This means 150 = #frames(300) - #TL2_frames(150).
+    EXPECT_EQ((int)GetMismatchFrames(), 150);
   }
 
   virtual void BasicRateTargetingSVC3TL1SLResizeTest() {