rtc-svc: Allow for second temporal reference for spatial layers

Allow for second (longer-term) temporal reference for SVC for
the top spatial layer. altref is used for this reference.
Update sample encoder, fix in nonrd_pickmode, and add unittest.

Change-Id: I2631de594b855f2219d86728fb163dc2ceccda0c
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 199cb0d..906811b 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1609,7 +1609,8 @@
   const struct segmentation *const seg = &cm->seg;
   const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
 
-  int use_alt_ref_frame = cpi->sf.rt_sf.use_nonrd_altref_frame;
+  // For SVC the usage of alt_ref is determined by the ref_frame_flags.
+  int use_alt_ref_frame = cpi->use_svc || cpi->sf.rt_sf.use_nonrd_altref_frame;
   int use_golden_ref_frame = 1;
 
   use_ref_frame[LAST_FRAME] = 1;  // we never skip LAST
@@ -1713,9 +1714,10 @@
   uint32_t spatial_var_thresh = 50;
   int motion_thresh = 32;
   // Adjust thresholds to make intra mode likely tested if the other
-  // references (golden, alt) are skipped/not checked.
-  if (cpi->sf.rt_sf.use_nonrd_altref_frame == 0 &&
-      cpi->sf.rt_sf.nonrd_prune_ref_frame_search > 0) {
+  // references (golden, alt) are skipped/not checked. For now always
+  // adjust for svc mode.
+  if (cpi->use_svc || (cpi->sf.rt_sf.use_nonrd_altref_frame == 0 &&
+                       cpi->sf.rt_sf.nonrd_prune_ref_frame_search > 0)) {
     spatial_var_thresh = 150;
     motion_thresh = 0;
   }
diff --git a/examples/svc_encoder_rtc.c b/examples/svc_encoder_rtc.c
index 2b883cf..401049f 100644
--- a/examples/svc_encoder_rtc.c
+++ b/examples/svc_encoder_rtc.c
@@ -263,6 +263,7 @@
                               int *use_svc_control, int spatial_layer_id,
                               int is_key_frame, int ksvc_mode) {
   int i;
+  int enable_longterm_temporal_ref = 1;
   int shift = (layering_mode == 7) ? 2 : 0;
   *use_svc_control = 1;
   layer_id->spatial_layer_id = spatial_layer_id;
@@ -456,6 +457,14 @@
         ref_frame_config->refresh[2] = 1;
         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
+        // For 3 spatial layer case: allow for top spatial layer to use
+        // additional temporal reference. Update every 10 frames.
+        if (enable_longterm_temporal_ref) {
+          ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
+          ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
+          if (base_count % 10 == 0)
+            ref_frame_config->refresh[REF_FRAMES - 1] = 1;
+        }
       }
       break;
     case 7:
@@ -587,7 +596,19 @@
         }
       }
       if (layer_id->spatial_layer_id > 0)
-        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;  // Reference GOLDEN.
+        // Reference GOLDEN.
+        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
+      // For 3 spatial layer case 7 (where there is free buffer slot):
+      // allow for top spatial layer to use additional temporal reference.
+      // Additional reference is only updated on base temporal layer, every
+      // 10 TL0 frames here.
+      if (enable_longterm_temporal_ref && layer_id->spatial_layer_id == 2 &&
+          layering_mode == 7) {
+        ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
+        ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
+        if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
+          ref_frame_config->refresh[REF_FRAMES - 1] = 1;
+      }
       break;
     default: assert(0); die("Error: Unsupported temporal layering mode!\n");
   }
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index fcd9bc5..7a8a153 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -265,11 +265,18 @@
         for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 1;
         ref_frame_config->ref_idx[0] = 2;
         ref_frame_config->refresh[2] = 1;
+        if (multi_ref) {
+          ref_frame_config->ref_idx[6] = 7;
+          ref_frame_config->reference[6] = 1;
+          if (base_count % 10 == 0) ref_frame_config->refresh[7] = 1;
+        }
       }
       // Reference GOLDEN.
       if (layer_id->spatial_layer_id > 0) ref_frame_config->reference[3] = 1;
     } else if (number_temporal_layers_ == 3 && number_spatial_layers_ == 3) {
       // 3 spatial and 3 temporal layer.
+      // Overlap in the buffer slot updates: the slots 3 and 4 updated by
+      // first TL2 are reused for update in TL1 superframe.
       if (superframe_cnt_ % 4 == 0) {
         // Base temporal layer.
         layer_id->temporal_layer_id = 0;
@@ -324,56 +331,65 @@
         if (layer_id->spatial_layer_id == 0) {
           // Reference LAST.
           // Set all buffer_idx to 0.
-          // Set GOLDEN to slot 5 and update slot 5.
+          // Set GOLDEN to slot 3 and update slot 3.
           for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
-          ref_frame_config->ref_idx[3] = 5;
-          ref_frame_config->refresh[5] = 1;
+          ref_frame_config->ref_idx[3] = 3;
+          ref_frame_config->refresh[3] = 1;
         } else if (layer_id->spatial_layer_id == 1) {
           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
-          // GOLDEN (and all other refs) to slot 5.
-          // Set LAST2 to slot 6 and update slot 6.
-          for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 5;
+          // GOLDEN (and all other refs) to slot 3.
+          // Set LAST2 to slot 4 and update slot 4.
+          for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 3;
           ref_frame_config->ref_idx[0] = 1;
-          ref_frame_config->ref_idx[2] = 6;
-          ref_frame_config->refresh[6] = 1;
+          ref_frame_config->ref_idx[2] = 4;
+          ref_frame_config->refresh[4] = 1;
         } else if (layer_id->spatial_layer_id == 2) {
           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
-          // GOLDEN (and all other refs) to slot 6.
-          // Set LAST2 to slot 6 and update slot 7.
-          for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 6;
+          // GOLDEN (and all other refs) to slot 4.
+          // Set LAST2 to slot 5 and update slot 5.
+          for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 4;
           ref_frame_config->ref_idx[0] = 2;
-          ref_frame_config->ref_idx[2] = 7;
-          ref_frame_config->refresh[7] = 1;
+          ref_frame_config->ref_idx[2] = 5;
+          ref_frame_config->refresh[5] = 1;
         }
       } else if ((superframe_cnt_ - 3) % 4 == 0) {
         // Second top temporal enhancement layer.
         layer_id->temporal_layer_id = 2;
         if (layer_id->spatial_layer_id == 0) {
-          // Set LAST to slot 5 and reference LAST.
+          // Set LAST to slot 3 and reference LAST.
           // Set GOLDEN to slot 3 and update slot 3.
           // Set all other buffer_idx to 0.
           for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
-          ref_frame_config->ref_idx[0] = 5;
+          ref_frame_config->ref_idx[0] = 3;
           ref_frame_config->ref_idx[3] = 3;
           ref_frame_config->refresh[3] = 1;
         } else if (layer_id->spatial_layer_id == 1) {
-          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
+          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 4,
           // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
           for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
-          ref_frame_config->ref_idx[0] = 6;
+          ref_frame_config->ref_idx[0] = 4;
           ref_frame_config->ref_idx[3] = 3;
           ref_frame_config->ref_idx[1] = 4;
           ref_frame_config->refresh[4] = 1;
         } else if (layer_id->spatial_layer_id == 2) {
-          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
+          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 5,
           // GOLDEN to slot 4. No update.
           for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
-          ref_frame_config->ref_idx[0] = 7;
+          ref_frame_config->ref_idx[0] = 5;
           ref_frame_config->ref_idx[3] = 4;
         }
       }
       // Reference GOLDEN.
       if (layer_id->spatial_layer_id > 0) ref_frame_config->reference[3] = 1;
+      // Allow for top spatial layer to use additional temporal reference.
+      // Additional reference is only updated on base temporal layer, every
+      // 10 TL0 frames here.
+      if (multi_ref && layer_id->spatial_layer_id == 2) {
+        ref_frame_config->ref_idx[6] = 7;
+        ref_frame_config->reference[6] = 1;
+        if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
+          ref_frame_config->refresh[7] = 1;
+      }
     }
     return layer_flags;
   }
@@ -458,7 +474,7 @@
     cfg_.rc_max_quantizer = 63;
     cfg_.rc_end_usage = AOM_CBR;
     cfg_.g_lag_in_frames = 0;
-    cfg_.g_error_resilient = 1;
+    cfg_.g_error_resilient = 0;
     cfg_.rc_resize_mode = RESIZE_DYNAMIC;
 
     ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30,
@@ -506,7 +522,7 @@
     cfg_.rc_max_quantizer = 63;
     cfg_.rc_end_usage = AOM_CBR;
     cfg_.g_lag_in_frames = 0;
-    cfg_.g_error_resilient = 1;
+    cfg_.g_error_resilient = 0;
 
     ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
                                          288, 30, 1, 0, 300);
@@ -535,7 +551,7 @@
     cfg_.rc_max_quantizer = 63;
     cfg_.rc_end_usage = AOM_CBR;
     cfg_.g_lag_in_frames = 0;
-    cfg_.g_error_resilient = 1;
+    cfg_.g_error_resilient = 0;
 
     ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
                                          288, 30, 1, 0, 300);
@@ -556,6 +572,37 @@
     }
   }
 
+  virtual void BasicRateTargetingSVC1TL3SLMultiRefTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    multi_ref_ = 1;
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 3;
+    target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
   virtual void BasicRateTargetingSVC3TL3SLTest() {
     cfg_.rc_buf_initial_sz = 500;
     cfg_.rc_buf_optimal_sz = 500;
@@ -565,7 +612,7 @@
     cfg_.rc_max_quantizer = 63;
     cfg_.rc_end_usage = AOM_CBR;
     cfg_.g_lag_in_frames = 0;
-    cfg_.g_error_resilient = 1;
+    cfg_.g_error_resilient = 0;
 
     ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
                                          288, 30, 1, 0, 300);
@@ -607,7 +654,7 @@
     cfg_.rc_max_quantizer = 63;
     cfg_.rc_end_usage = AOM_CBR;
     cfg_.g_lag_in_frames = 0;
-    cfg_.g_error_resilient = 1;
+    cfg_.g_error_resilient = 0;
 
     ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
     const int bitrate_array[2] = { 600, 1200 };
@@ -639,6 +686,48 @@
     }
   }
 
+  virtual void BasicRateTargetingSVC3TL3SLHDMultiRefTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    multi_ref_ = 1;
+    number_temporal_layers_ = 3;
+    number_spatial_layers_ = 3;
+    // SL0
+    const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100;
+    target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100;
+    target_layer_bitrate_[2] = bitrate_sl0;
+    // SL1
+    const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100;
+    target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100;
+    target_layer_bitrate_[5] = bitrate_sl1;
+    // SL2
+    const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100;
+    target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100;
+    target_layer_bitrate_[8] = bitrate_sl2;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.4)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
   virtual void BasicRateTargetingSVC3TL3SLKfTest() {
     cfg_.rc_buf_initial_sz = 500;
     cfg_.rc_buf_optimal_sz = 500;
@@ -648,7 +737,7 @@
     cfg_.rc_max_quantizer = 63;
     cfg_.rc_end_usage = AOM_CBR;
     cfg_.g_lag_in_frames = 0;
-    cfg_.g_error_resilient = 1;
+    cfg_.g_error_resilient = 0;
     cfg_.kf_mode = AOM_KF_AUTO;
     cfg_.kf_min_dist = cfg_.kf_max_dist = 100;
 
@@ -692,7 +781,7 @@
     cfg_.rc_max_quantizer = 63;
     cfg_.rc_end_usage = AOM_CBR;
     cfg_.g_lag_in_frames = 0;
-    cfg_.g_error_resilient = 1;
+    cfg_.g_error_resilient = 0;
     cfg_.g_profile = 1;
 
     ::libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
@@ -949,6 +1038,12 @@
   BasicRateTargetingSVC1TL3SLTest();
 }
 
+// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal,
+// with additional temporal reference for top spatial layer.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLMultiRef) {
+  BasicRateTargetingSVC1TL3SLMultiRefTest();
+}
+
 // Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers.
 TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SL) {
   BasicRateTargetingSVC3TL3SLTest();
@@ -960,6 +1055,12 @@
 }
 
 // Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
+// with additional temporal reference for top spatial layer.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHDMultiRef) {
+  BasicRateTargetingSVC3TL3SLHDMultiRefTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
 // for auto key frame mode with short key frame period.
 TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLKf) {
   BasicRateTargetingSVC3TL3SLKfTest();