rtc-svc: Fix to avoid scaling references

Fixes to avoid scaling references which speeds up the encoder,
currently by ~40% since the scaling in scale_references is
very slow (C code). Added two svc flags needed for this
(lc->is_key_frame and force_zeromv_spatial_ref).

Change-Id: Iff191a5a3eaf213865245b481e02b8b29e1f5a7a
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 90684f0..69b861f 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2055,9 +2055,15 @@
 
 static aom_codec_frame_flags_t get_frame_pkt_flags(const AV1_COMP *cpi,
                                                    unsigned int lib_flags) {
+  const SVC *const svc = &cpi->svc;
   aom_codec_frame_flags_t flags = lib_flags << 16;
 
-  if (lib_flags & FRAMEFLAGS_KEY) flags |= AOM_FRAME_IS_KEY;
+  if (lib_flags & FRAMEFLAGS_KEY ||
+      (cpi->use_svc &&
+       svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers +
+                          svc->temporal_layer_id]
+           .is_key_frame))
+    flags |= AOM_FRAME_IS_KEY;
   if (lib_flags & FRAMEFLAGS_INTRAONLY) flags |= AOM_FRAME_IS_INTRAONLY;
   if (lib_flags & FRAMEFLAGS_SWITCH) flags |= AOM_FRAME_IS_SWITCH;
   if (lib_flags & FRAMEFLAGS_ERROR_RESILIENT)
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index cefe418..76109a3 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2869,7 +2869,13 @@
                                              &cpi->scaled_last_source,
                                              downsample_filter, phase_scaler);
   }
-  if (!frame_is_intra_only(cm)) scale_references(cpi);
+
+  // For SVC the inter-layer/spatial prediction is not done for newmv
+  // (zero_mode is forced), and since the scaled references are only
+  // use for newmv search, we can avoid scaling here.
+  if (!frame_is_intra_only(cm) &&
+      !(cpi->use_svc && cpi->svc.force_zero_mode_spatial_ref))
+    scale_references(cpi);
 
   av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
                     q_cfg->enable_chroma_deltaq);
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index b0edce3..43311f1 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -2304,8 +2304,12 @@
   RATE_CONTROL *const rc = &cpi->rc;
   AV1_COMMON *const cm = &cpi->common;
   GF_GROUP *const gf_group = &cpi->gf_group;
+  SVC *const svc = &cpi->svc;
   int gf_update = 0;
   int target;
+  const int layer =
+      LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+                       svc->number_temporal_layers);
   // Turn this on to explicitly set the reference structure rather than
   // relying on internal/default structure.
   const int set_reference_structure = 1;
@@ -2315,8 +2319,8 @@
   }
   // Set frame type.
   if ((!cpi->use_svc && rc->frames_to_key == 0) ||
-      (cpi->use_svc && cpi->svc.spatial_layer_id == 0 &&
-       cpi->svc.current_superframe % cpi->oxcf.kf_cfg.key_freq_max == 0) ||
+      (cpi->use_svc && svc->spatial_layer_id == 0 &&
+       svc->current_superframe % cpi->oxcf.kf_cfg.key_freq_max == 0) ||
       (frame_flags & FRAMEFLAGS_KEY)) {
     frame_params->frame_type = KEY_FRAME;
     rc->this_key_frame_forced =
@@ -2325,11 +2329,21 @@
     rc->kf_boost = DEFAULT_KF_BOOST_RT;
     rc->source_alt_ref_active = 0;
     gf_group->update_type[gf_group->index] = KF_UPDATE;
-    if (cpi->use_svc && cm->current_frame.frame_number > 0)
-      av1_svc_reset_temporal_layers(cpi, 1);
+    if (cpi->use_svc) {
+      if (cm->current_frame.frame_number > 0)
+        av1_svc_reset_temporal_layers(cpi, 1);
+      svc->layer_context[layer].is_key_frame = 1;
+    }
   } else {
     frame_params->frame_type = INTER_FRAME;
     gf_group->update_type[gf_group->index] = LF_UPDATE;
+    if (cpi->use_svc) {
+      LAYER_CONTEXT *lc = &svc->layer_context[layer];
+      lc->is_key_frame =
+          svc->spatial_layer_id == 0
+              ? 0
+              : svc->layer_context[svc->temporal_layer_id].is_key_frame;
+    }
   }
   // Check for scene change, for non-SVC for now.
   if (!cpi->use_svc && cpi->sf.rt_sf.check_scene_detection)
diff --git a/av1/encoder/svc_layercontext.c b/av1/encoder/svc_layercontext.c
index afc7933..b57b85a 100644
--- a/av1/encoder/svc_layercontext.c
+++ b/av1/encoder/svc_layercontext.c
@@ -28,6 +28,7 @@
   int mi_cols = cpi->common.mi_params.mi_cols;
   svc->base_framerate = 30.0;
   svc->current_superframe = 0;
+  svc->force_zero_mode_spatial_ref = 1;
 
   for (int sl = 0; sl < svc->number_spatial_layers; ++sl) {
     for (int tl = 0; tl < svc->number_temporal_layers; ++tl) {
@@ -182,15 +183,15 @@
   // For each reference (LAST/GOLDEN) set the skip_nonzero_last/gf frame flags.
   // This is to skip testing nonzero-mv for that reference if it was last
   // refreshed (i.e., buffer slot holding that reference was refreshed) on the
-  // previous spatial layer at the same time (current_superframe).
-  if (svc->external_ref_frame_config) {
+  // previous spatial layer(s) at the same time (current_superframe).
+  if (svc->external_ref_frame_config && svc->force_zero_mode_spatial_ref) {
     int ref_frame_idx = svc->ref_idx[LAST_FRAME - 1];
     if (svc->buffer_time_index[ref_frame_idx] == svc->current_superframe &&
-        svc->buffer_spatial_layer[ref_frame_idx] == svc->spatial_layer_id - 1)
+        svc->buffer_spatial_layer[ref_frame_idx] <= svc->spatial_layer_id - 1)
       svc->skip_nonzeromv_last = 1;
     ref_frame_idx = svc->ref_idx[GOLDEN_FRAME - 1];
     if (svc->buffer_time_index[ref_frame_idx] == svc->current_superframe &&
-        svc->buffer_spatial_layer[ref_frame_idx] == svc->spatial_layer_id - 1)
+        svc->buffer_spatial_layer[ref_frame_idx] <= svc->spatial_layer_id - 1)
       svc->skip_nonzeromv_gf = 1;
   }
 }
diff --git a/av1/encoder/svc_layercontext.h b/av1/encoder/svc_layercontext.h
index 538a949..07d8148 100644
--- a/av1/encoder/svc_layercontext.h
+++ b/av1/encoder/svc_layercontext.h
@@ -41,6 +41,7 @@
   int counter_encode_maxq_scene_change;
   uint8_t speed;
   unsigned char group_index;
+  int is_key_frame;
 } LAYER_CONTEXT;
 
 typedef struct SVC {
@@ -70,6 +71,8 @@
   // downsample_filter_phase: = 0 will do sub-sampling (no weighted average),
   // = 8 will center the target pixel and get a symmetric averaging filter.
   int downsample_filter_phase[AOM_MAX_SS_LAYERS];
+  // Force zero-mv in mode search for the spatial/inter-layer reference.
+  int force_zero_mode_spatial_ref;
 } SVC;
 
 struct AV1_COMP;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 082bc31..d8fd4f4 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -823,7 +823,12 @@
   const uint8_t *d;
   int sp;
   int dp;
-  const int is_key_frame = frame_is_intra_only(cm);
+
+  const int is_key_frame =
+      (frame_is_intra_only(cm) ||
+       (cpi->use_svc &&
+        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
+
   assert(cm->seq_params.sb_size == BLOCK_64X64 ||
          cm->seq_params.sb_size == BLOCK_128X128);
   const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);