rtc: Fix to screen with temporal layers

In real-time mode there are speed features, i.e.,
scene_detection and source_metrics_sb, that use
current source vs last_source (source of previous frame),
but for temporal layers the previous frames does not
always correspond to the prediction reference (LAST).
For example, the base temporal layers (TL0) always predicts
from the previous base TL0, which is 2/4 frames away for
number_temporal_layers = 2/3.

For screen: the scene_detection and source_metrics_sb have
big impact on quality and so this wrong choice of
last_source for these features can have big loss in
compression efficiency.

This CL fixes this by keeping track of the source frame for
the LAST reference, and using that for the last_source.

This fix only applies to number_spatial_layers = 1,
follow-up will be made to handle spatial layers.

Added screen option to svc_encoder_rtc.c, via the patch
attached in the issue below.

Bug: aomedia:3346
Change-Id: Ic809feb19b87d0881c95b2749968f9e7931f22f8
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 62083b8..0fa6feb 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -1388,6 +1388,18 @@
   // Source may be changed if temporal filtered later.
   frame_input.source = &source->img;
   frame_input.last_source = last_source != NULL ? &last_source->img : NULL;
+  // For temporal layers: if LAST reference is not the previous frame set the
+  // last_source to the source frame corresponding to the LAST reference.
+  // TODO(marpan): For now only do this for single spatial layer: the case of
+  // number_spatial_layers > 1 will be handled in another change.
+  if (cm->current_frame.frame_number > 0 &&
+      cpi->svc.number_temporal_layers > 1 &&
+      cpi->svc.number_spatial_layers == 1) {
+    const int buffslot_last = cpi->ppi->rtc_ref.ref_idx[0];  // index 0 is LAST
+    if (cpi->svc.frame_number_buffslot[buffslot_last] <
+        cm->current_frame.frame_number - 1)
+      frame_input.last_source = &cpi->svc.source_last_ref;
+  }
   frame_input.ts_duration = source->ts_end - source->ts_start;
   // Save unfiltered source. It is used in av1_get_second_pass_params().
   cpi->unfiltered_source = frame_input.source;
@@ -1677,5 +1689,17 @@
         is_frame_droppable(&cpi->ppi->rtc_ref, &ext_flags->refresh_frame);
   }
 
+  // For temporal layers: keep track of the source corresponding to the
+  // refresh of LAST reference (index 0). Note if temporal filter or denoising
+  // is on, the source will be modified during encodiing, but for now keep
+  // this as is.
+  if (cpi->svc.number_temporal_layers > 1 &&
+      cpi->svc.number_spatial_layers == 1 &&
+      cpi->ppi->rtc_ref.refresh[cpi->ppi->rtc_ref.ref_idx[0]]) {
+    aom_yv12_copy_y(cpi->source, &cpi->svc.source_last_ref);
+    aom_yv12_copy_u(cpi->source, &cpi->svc.source_last_ref);
+    aom_yv12_copy_v(cpi->source, &cpi->svc.source_last_ref);
+  }
+
   return AOM_CODEC_OK;
 }
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 88143a8..4c68faf 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2418,6 +2418,19 @@
   av1_set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
   av1_set_mv_search_params(cpi);
 
+  if (cm->current_frame.frame_number == 0 && svc->number_temporal_layers > 1 &&
+      svc->number_spatial_layers == 1) {
+    const SequenceHeader *seq_params = cm->seq_params;
+    if (aom_alloc_frame_buffer(
+            &cpi->svc.source_last_ref, cm->width, cm->height,
+            seq_params->subsampling_x, seq_params->subsampling_y,
+            seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
+            cm->features.byte_alignment, 0)) {
+      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                         "Failed to allocate buffer for source_last_ref");
+    }
+  }
+
   if (!cpi->ppi->use_svc) {
     phase_scaler = 8;
     // 2:1 scaling.
@@ -2617,6 +2630,8 @@
       sf->rt_sf.gf_refresh_based_on_qp)
     av1_adjust_gf_refresh_qp_one_pass_rt(cpi);
 
+  av1_svc_update_frame_number_buffslot(cpi);
+
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, av1_encode_frame_time);
 #endif
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index fd350f0..9333f7c 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -269,6 +269,7 @@
   aom_free_frame_buffer(&cpi->scaled_source);
   aom_free_frame_buffer(&cpi->scaled_last_source);
   aom_free_frame_buffer(&cpi->orig_source);
+  aom_free_frame_buffer(&cpi->svc.source_last_ref);
 
   free_token_info(token_info);
 
diff --git a/av1/encoder/svc_layercontext.c b/av1/encoder/svc_layercontext.c
index bcc59f6..aff79e3 100644
--- a/av1/encoder/svc_layercontext.c
+++ b/av1/encoder/svc_layercontext.c
@@ -539,3 +539,13 @@
     }
   }
 }
+void av1_svc_update_frame_number_buffslot(AV1_COMP *const cpi) {
+  SVC *const svc = &cpi->svc;
+  const AV1_COMMON *const cm = &cpi->common;
+  const CurrentFrame *const current_frame = &cm->current_frame;
+  const RTC_REF *const rtc_ref = &cpi->ppi->rtc_ref;
+  for (int i = 0; i < 8; i++) {
+    if (current_frame->frame_type == KEY_FRAME || rtc_ref->refresh[i] == 1)
+      svc->frame_number_buffslot[i] = current_frame->frame_number;
+  }
+}
diff --git a/av1/encoder/svc_layercontext.h b/av1/encoder/svc_layercontext.h
index a27d768..d4b3d1a 100644
--- a/av1/encoder/svc_layercontext.h
+++ b/av1/encoder/svc_layercontext.h
@@ -107,6 +107,7 @@
   int num_encoded_top_layer;
   int first_layer_denoise;
   int high_source_sad_superframe;
+  YV12_BUFFER_CONFIG source_last_ref;
   /*!\endcond */
 
   /*!
@@ -134,6 +135,11 @@
    * Force zero-mv in mode search for the spatial/inter-layer reference.
    */
   int force_zero_mode_spatial_ref;
+
+  /*!
+   * Frame numbers corresponding to each of the 8 reference buffer slots.
+   */
+  unsigned int frame_number_buffslot[8];
 } SVC;
 
 struct AV1_COMP;
@@ -277,6 +283,8 @@
 
 void av1_svc_check_reset_layer_rc_flag(struct AV1_COMP *const cpi);
 
+void av1_svc_update_frame_number_buffslot(struct AV1_COMP *const cpi);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/examples/svc_encoder_rtc.c b/examples/svc_encoder_rtc.c
index 9e607ea..4e6c437 100644
--- a/examples/svc_encoder_rtc.c
+++ b/examples/svc_encoder_rtc.c
@@ -38,6 +38,7 @@
   int layering_mode;
   int output_obu;
   int decode;
+  int tune_content;
 } AppInput;
 
 typedef enum {
@@ -91,6 +92,14 @@
 static const arg_def_t test_decode_arg =
     ARG_DEF(NULL, "test-decode", 1,
             "Attempt to test decoding the output when set to 1. Default is 1.");
+static const struct arg_enum_list tune_content_enum[] = {
+  { "default", AOM_CONTENT_DEFAULT },
+  { "screen", AOM_CONTENT_SCREEN },
+  { "film", AOM_CONTENT_FILM },
+  { NULL, 0 }
+};
+static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
+    NULL, "tune-content", 1, "Tune content type", tune_content_enum);
 
 #if CONFIG_AV1_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
@@ -125,6 +134,7 @@
                                        &error_resilient_arg,
                                        &output_obu_arg,
                                        &test_decode_arg,
+                                       &tune_content_arg,
                                        NULL };
 
 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
@@ -365,6 +375,9 @@
       if (app_input->decode != 0 && app_input->decode != 1)
         die("Invalid value for test decode flag (0, 1): %d.",
             app_input->decode);
+    } else if (arg_match(&arg, &tune_content_arg, argi)) {
+      app_input->tune_content = arg_parse_enum_or_int(&arg);
+      printf("tune content %d\n", app_input->tune_content);
     } else {
       ++argj;
     }
@@ -1357,6 +1370,12 @@
                     cfg.g_threads ? get_msb(cfg.g_threads) : 0);
   if (cfg.g_threads > 1) aom_codec_control(&codec, AV1E_SET_ROW_MT, 1);
 
+  aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
+  if (app_input.tune_content == AOM_CONTENT_SCREEN) {
+    aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
+    aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 1);
+  }
+
   svc_params.number_spatial_layers = ss_number_layers;
   svc_params.number_temporal_layers = ts_number_layers;
   for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {