Use variable qps in low delay mode

At sub-gop length of 32, this improves the coding performance
over flat QPs for the p frames by -4.0% for a3 set and -3.9%
for hdres2.

BUG=aomedia:3079

Change-Id: Ifcd5f49e6b674fbe17dcc71fee882d76b9d6902c
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 4fb9ce5..6d1056f 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -553,15 +553,26 @@
       cpi->ppi->p_rc.arf_q = *q;
   }
 
-  if (cpi->oxcf.q_cfg.use_fixed_qp_offsets && cpi->oxcf.rc_cfg.mode == AOM_Q &&
-      is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) {
-    const double qstep_ratio =
-        0.2 + (1.0 - (double)cpi->rc.active_worst_quality / MAXQ) * 0.3;
-    *q = av1_get_q_index_from_qstep_ratio(
-        cpi->rc.active_worst_quality, qstep_ratio, cm->seq_params->bit_depth);
-    *top_index = *bottom_index = *q;
-    if (gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE)
-      cpi->ppi->p_rc.arf_q = *q;
+  if (cpi->oxcf.q_cfg.use_fixed_qp_offsets && cpi->oxcf.rc_cfg.mode == AOM_Q) {
+    if (is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) {
+      const double qstep_ratio =
+          0.2 + (1.0 - (double)cpi->rc.active_worst_quality / MAXQ) * 0.3;
+      *q = av1_get_q_index_from_qstep_ratio(
+          cpi->rc.active_worst_quality, qstep_ratio, cm->seq_params->bit_depth);
+      *top_index = *bottom_index = *q;
+      if (gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE ||
+          gf_group->update_type[cpi->gf_frame_index] == KF_UPDATE ||
+          gf_group->update_type[cpi->gf_frame_index] == GF_UPDATE)
+        cpi->ppi->p_rc.arf_q = *q;
+    } else {
+      int this_height = gf_group->layer_depth[cpi->gf_frame_index];
+      int arf_q = cpi->ppi->p_rc.arf_q;
+      while (this_height > 1) {
+        arf_q = (arf_q + cpi->oxcf.rc_cfg.cq_level + 1) / 2;
+        --this_height;
+      }
+      *top_index = *bottom_index = *q = arf_q;
+    }
   }
 #endif