rtc: Constrain the reset of ref_idx for spatial layers

Constrain the rtc_ref->ref_idx[] reset for spatial layers,
as this can cause decode failures when LAST is used as
the spatial reference. This is because the rtc_ref->ref_idx[]
is mapped to the remapped_idx[] at the start of encoding,
and this is used further below (after the reset) in the function
write_frame_size_with_refs(line ~3151).

An alternate fix would be to reset
cm->remapped_ref_idx[ref_frame - 1] = map_idx
after line 3125. But for now we take the simpler fix
of constraining the reset.

The previous fix of constraining the reset
to single reference is not quite correct and
can still fail for spatial layers. The previous fix is here:
https://aomedia-review.googlesource.com/c/aom/+/193761

Added two test cases that trigger the failure.

Change-Id: I86490e3f96ffcea7fb6e972a7d2085675c65fcc4
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index d885de0..bff49cf 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -3102,7 +3102,6 @@
         aom_wb_write_literal(wb, gld_ref, REF_FRAMES_LOG2);
       }
       int first_ref_map_idx = INVALID_IDX;
-      int num_references = 0;
       if (cpi->ppi->rtc_ref.set_ref_frame_config) {
         for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
           if (cpi->ppi->rtc_ref.reference[ref_frame - 1] == 1) {
@@ -3110,15 +3109,13 @@
             break;
           }
         }
-        for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-          if (cpi->ppi->rtc_ref.reference[ref_frame - 1] == 1) num_references++;
-        }
       }
       for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
         assert(get_ref_frame_map_idx(cm, ref_frame) != INVALID_IDX);
         if (!current_frame->frame_refs_short_signaling) {
           if (cpi->ppi->rtc_ref.set_ref_frame_config &&
-              first_ref_map_idx != INVALID_IDX && num_references == 1 &&
+              first_ref_map_idx != INVALID_IDX &&
+              cpi->svc.number_spatial_layers == 1 &&
               !seq_params->order_hint_info.enable_order_hint) {
             // For the usage of set_ref_frame_config:
             // for any reference not used set their ref_map_idx
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 7ba8b19..92fce21 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -97,6 +97,8 @@
     user_define_frame_qp_ = 0;
     set_speed_per_layer_ = false;
     simulcast_mode_ = false;
+    use_last_as_scaled_ = false;
+    use_last_as_scaled_single_ref_ = false;
   }
 
   void PreEncodeFrameHook(::libaom_test::VideoSource *video,
@@ -145,7 +147,8 @@
         video->frame(), &layer_id_, &ref_frame_config_, &ref_frame_comp_pred_,
         spatial_layer_id, multi_ref_, comp_pred_,
         (video->frame() % cfg_.kf_max_dist) == 0, dynamic_enable_disable_mode_,
-        rps_mode_, rps_recovery_frame_, simulcast_mode_);
+        rps_mode_, rps_recovery_frame_, simulcast_mode_, use_last_as_scaled_,
+        use_last_as_scaled_single_ref_);
     if (intra_only_ == 1 && frame_sync_ > 0) {
       // Set an Intra-only frame on SL0 at frame_sync_.
       // In order to allow decoding to start on SL0 in mid-sequence we need to
@@ -654,7 +657,8 @@
       aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int spatial_layer,
       int multi_ref, int comp_pred, int is_key_frame,
       int dynamic_enable_disable_mode, int rps_mode, int rps_recovery_frame,
-      int simulcast_mode) {
+      int simulcast_mode, bool use_last_as_scaled,
+      bool use_last_as_scaled_single_ref) {
     int lag_index = 0;
     int base_count = frame_cnt >> 2;
     layer_id->spatial_layer_id = spatial_layer;
@@ -803,6 +807,11 @@
         // Update slot 1 (LAST).
         for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0;
         ref_frame_config->ref_idx[0] = 1;
+        if (use_last_as_scaled) {
+          for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 1;
+          ref_frame_config->ref_idx[0] = 0;
+          ref_frame_config->ref_idx[3] = 1;
+        }
         ref_frame_config->refresh[1] = 1;
       } else if (layer_id->spatial_layer_id == 2) {
         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
@@ -818,7 +827,12 @@
         }
       }
       // Reference GOLDEN.
-      if (layer_id->spatial_layer_id > 0) ref_frame_config->reference[3] = 1;
+      if (layer_id->spatial_layer_id > 0) {
+        if (use_last_as_scaled_single_ref)
+          ref_frame_config->reference[3] = 0;
+        else
+          ref_frame_config->reference[3] = 1;
+      }
     } else if (number_temporal_layers_ == 3 && number_spatial_layers_ == 3) {
       if (simulcast_mode) {
         ref_config_simulcast3SL3TL(ref_frame_config, layer_id, is_key_frame,
@@ -1457,6 +1471,69 @@
     }
   }
 
+  virtual void BasicRateTargetingSVC1TL3SLLastIsScaledTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 3;
+    use_last_as_scaled_ = true;
+    target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
+  virtual void BasicRateTargetingSVC1TL3SLLastIsScaledSingleRefTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 300);
+    const int bitrate_array[2] = { 500, 1000 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    number_temporal_layers_ = 1;
+    number_spatial_layers_ = 3;
+    use_last_as_scaled_ = true;
+    use_last_as_scaled_single_ref_ = true;
+    target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8;
+    target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
   virtual void BasicRateTargetingSVC1TL3SLMultiRefTest() {
     cfg_.rc_buf_initial_sz = 500;
     cfg_.rc_buf_optimal_sz = 500;
@@ -2543,6 +2620,8 @@
   int rps_mode_;
   int rps_recovery_frame_;
   int simulcast_mode_;
+  bool use_last_as_scaled_;
+  bool use_last_as_scaled_single_ref_;
 
   int user_define_frame_qp_;
   int frame_qp_;
@@ -2636,6 +2715,19 @@
   BasicRateTargetingSVC1TL3SLTest();
 }
 
+// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal.
+// Force the spatial reference to be LAST, with a second temporal
+// reference (GOLDEN).
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLLastIsScaled) {
+  BasicRateTargetingSVC1TL3SLLastIsScaledTest();
+}
+
+// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal.
+// Force the spatial reference to be LAST, and force only 1 reference.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLastIsScaledSingleRef) {
+  BasicRateTargetingSVC1TL3SLLastIsScaledSingleRefTest();
+}
+
 // Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal,
 // with additional temporal reference for top spatial layer.
 TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLMultiRef) {