Merge "Use zero motion vector in choose_partitioning"
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index b5663fc..0e112f2 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -127,6 +127,8 @@
   unsigned int rc_max_intra_bitrate_pct;
   // maximum allowed bitrate for any inter frame in % of bitrate target.
   unsigned int rc_max_inter_bitrate_pct;
+  // percent of rate boost for golden frame in CBR mode.
+  unsigned int gf_cbr_boost_pct;
 
   MODE mode;
   int pass;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 5a67ef4..c8c784b 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2478,7 +2478,6 @@
 }
 
 #define MINQ_ADJ_LIMIT 32
-#define Q_LIMIT_STEP 1
 void vp9_twopass_postencode_update(VP9_COMP *cpi) {
   TWO_PASS *const twopass = &cpi->twopass;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -2523,16 +2522,22 @@
     if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) {
       --twopass->extend_maxq;
       if (rc->rolling_target_bits >= rc->rolling_actual_bits)
-        twopass->extend_minq += Q_LIMIT_STEP;
+        ++twopass->extend_minq;
     // Overshoot.
     } else if (rc->rate_error_estimate < -cpi->oxcf.over_shoot_pct) {
       --twopass->extend_minq;
       if (rc->rolling_target_bits < rc->rolling_actual_bits)
-        twopass->extend_maxq += Q_LIMIT_STEP;
+        ++twopass->extend_maxq;
     } else {
+      // Adjustment for extreme local overshoot.
+      if (rc->projected_frame_size > (2 * rc->base_frame_target) &&
+          rc->projected_frame_size > (2 * rc->avg_frame_bandwidth))
+        ++twopass->extend_maxq;
+
+      // Unwind undershoot or overshoot adjustment.
       if (rc->rolling_target_bits < rc->rolling_actual_bits)
         --twopass->extend_minq;
-      if (rc->rolling_target_bits > rc->rolling_actual_bits)
+      else if (rc->rolling_target_bits > rc->rolling_actual_bits)
         --twopass->extend_maxq;
     }
     twopass->extend_minq = clamp(twopass->extend_minq, 0, MINQ_ADJ_LIMIT);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index a1e431d..1e4c9b1 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -617,6 +617,8 @@
         continue;
 
       if (this_mode == NEWMV) {
+        if (ref_frame > LAST_FRAME)
+          continue;
         if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
             this_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
           continue;
@@ -757,7 +759,7 @@
     }
     // If the current reference frame is valid and we found a usable mode,
     // we are done.
-    if (best_rdc.rdcost < INT64_MAX)
+    if (best_rdc.rdcost < INT64_MAX && ref_frame == GOLDEN_FRAME)
       break;
   }
 
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 58168d6..8a5b611 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1339,7 +1339,18 @@
   const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
   const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
   int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
-  int target = rc->avg_frame_bandwidth;
+  int target;
+
+  if (oxcf->gf_cbr_boost_pct) {
+    const int af_ratio_pct = oxcf->gf_cbr_boost_pct + 100;
+    target =  cpi->refresh_golden_frame ?
+      (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio_pct) /
+      (rc->baseline_gf_interval * 100 + af_ratio_pct - 100) :
+      (rc->avg_frame_bandwidth * rc->baseline_gf_interval * 100) /
+      (rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
+  } else {
+    target = rc->avg_frame_bandwidth;
+  }
   if (svc->number_temporal_layers > 1 &&
       oxcf->rc_mode == VPX_CBR) {
     // Note that for layers, avg_frame_bandwidth is the cumulative
@@ -1453,15 +1464,25 @@
     rc->frames_to_key = cpi->oxcf.key_freq;
     rc->kf_boost = DEFAULT_KF_BOOST;
     rc->source_alt_ref_active = 0;
-    target = calc_iframe_target_size_one_pass_cbr(cpi);
   } else {
     cm->frame_type = INTER_FRAME;
-    target = calc_pframe_target_size_one_pass_cbr(cpi);
   }
+  if (rc->frames_till_gf_update_due == 0) {
+    rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+    // NOTE: frames_till_gf_update_due must be <= frames_to_key.
+    if (rc->frames_till_gf_update_due > rc->frames_to_key)
+      rc->frames_till_gf_update_due = rc->frames_to_key;
+    cpi->refresh_golden_frame = 1;
+    rc->gfu_boost = DEFAULT_GF_BOOST;
+  }
+
+  if (cm->frame_type == KEY_FRAME)
+    target = calc_iframe_target_size_one_pass_cbr(cpi);
+  else
+    target = calc_pframe_target_size_one_pass_cbr(cpi);
+
   vp9_rc_set_frame_target(cpi, target);
-  // Don't use gf_update by default in CBR mode.
-  rc->frames_till_gf_update_due = INT_MAX;
-  rc->baseline_gf_interval = INT_MAX;
 }
 
 int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c
index 09969b0..4ddee7b 100644
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -125,18 +125,16 @@
   return acc_diff;
 }
 
-static int vp9_denoiser_4xM_sse2(const uint8_t *sig, int sig_stride,
-                                 const uint8_t *mc_running_avg_y,
-                                 int mc_avg_y_stride,
-                                 uint8_t *running_avg_y, int avg_y_stride,
-                                 int increase_denoising,
-                                 BLOCK_SIZE bs,
-                                 int motion_magnitude) {
+// Denoiser for 4xM and 8xM blocks.
+static int vp9_denoiser_NxM_sse2_small(
+    const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y,
+    int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride,
+    int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) {
   int sum_diff_thresh, r, sum_diff = 0;
   const int shift_inc  = (increase_denoising &&
                           motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
                          1 : 0;
-  uint8_t sig_buffer[2][16], mc_running_buffer[2][16], running_buffer[2][16];
+  uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16];
   __m128i acc_diff = _mm_setzero_si128();
   const __m128i k_0 = _mm_setzero_si128();
   const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
@@ -149,41 +147,46 @@
   const __m128i l32 = _mm_set1_epi8(2);
   // Difference between level 2 and level 1 is 1.
   const __m128i l21 = _mm_set1_epi8(1);
+  const uint8_t shift = (width == 4) ? 2 : 1;
 
-  for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 2); ++r) {
-    vpx_memcpy(sig_buffer[r], sig, 4);
-    vpx_memcpy(sig_buffer[r] + 4, sig + sig_stride, 4);
-    vpx_memcpy(sig_buffer[r] + 8, sig + sig_stride * 2, 4);
-    vpx_memcpy(sig_buffer[r] + 12, sig + sig_stride * 3, 4);
-    vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, 4);
-    vpx_memcpy(mc_running_buffer[r] + 4, mc_running_avg_y +
-               mc_avg_y_stride, 4);
-    vpx_memcpy(mc_running_buffer[r] + 8, mc_running_avg_y +
-               mc_avg_y_stride * 2, 4);
-    vpx_memcpy(mc_running_buffer[r] + 12, mc_running_avg_y +
-               mc_avg_y_stride * 3, 4);
-    vpx_memcpy(running_buffer[r], running_avg_y, 4);
-    vpx_memcpy(running_buffer[r] + 4, running_avg_y +
-               avg_y_stride, 4);
-    vpx_memcpy(running_buffer[r] + 8, running_avg_y +
-               avg_y_stride * 2, 4);
-    vpx_memcpy(running_buffer[r] + 12, running_avg_y +
-               avg_y_stride * 3, 4);
+  for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
+    vpx_memcpy(sig_buffer[r], sig, width);
+    vpx_memcpy(sig_buffer[r] + width, sig + sig_stride, width);
+    vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, width);
+    vpx_memcpy(mc_running_buffer[r] + width,
+               mc_running_avg_y + mc_avg_y_stride, width);
+    vpx_memcpy(running_buffer[r], running_avg_y, width);
+    vpx_memcpy(running_buffer[r] + width,
+               running_avg_y + avg_y_stride, width);
+    if (width == 4) {
+      vpx_memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width);
+      vpx_memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width);
+      vpx_memcpy(mc_running_buffer[r] + width * 2,
+                 mc_running_avg_y + mc_avg_y_stride * 2, width);
+      vpx_memcpy(mc_running_buffer[r] + width * 3,
+                 mc_running_avg_y + mc_avg_y_stride * 3, width);
+      vpx_memcpy(running_buffer[r] + width * 2,
+                 running_avg_y + avg_y_stride * 2, width);
+      vpx_memcpy(running_buffer[r] + width * 3,
+                 running_avg_y + avg_y_stride * 3, width);
+    }
     acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
                                       mc_running_buffer[r],
                                       running_buffer[r],
                                       &k_0, &k_4, &k_8, &k_16,
                                       &l3, &l32, &l21, acc_diff);
-    vpx_memcpy(running_avg_y, running_buffer[r], 4);
-    vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 4, 4);
-    vpx_memcpy(running_avg_y + avg_y_stride * 2,
-               running_buffer[r] + 8, 4);
-    vpx_memcpy(running_avg_y + avg_y_stride * 3,
-               running_buffer[r] + 12, 4);
+    vpx_memcpy(running_avg_y, running_buffer[r], width);
+    vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width);
+    if (width == 4) {
+      vpx_memcpy(running_avg_y + avg_y_stride * 2,
+                 running_buffer[r] + width * 2, width);
+      vpx_memcpy(running_avg_y + avg_y_stride * 3,
+                 running_buffer[r] + width * 3, width);
+    }
     // Update pointers for next iteration.
-    sig += (sig_stride << 2);
-    mc_running_avg_y += (mc_avg_y_stride << 2);
-    running_avg_y += (avg_y_stride << 2);
+    sig += (sig_stride << shift);
+    mc_running_avg_y += (mc_avg_y_stride << shift);
+    running_avg_y += (avg_y_stride << shift);
   }
 
   {
@@ -206,19 +209,21 @@
       if (delta < 4) {
         const __m128i k_delta = _mm_set1_epi8(delta);
         running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
-        sum_diff = 0;
-        for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 2); ++r) {
+        for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
           acc_diff = vp9_denoiser_adj_16x1_sse2(
               sig_buffer[r], mc_running_buffer[r], running_buffer[r],
               k_0, k_delta, acc_diff);
-          vpx_memcpy(running_avg_y, running_buffer[r], 4);
-          vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 4, 4);
-          vpx_memcpy(running_avg_y + avg_y_stride * 2,
-                     running_buffer[r] + 8, 4);
-          vpx_memcpy(running_avg_y + avg_y_stride * 3,
-                     running_buffer[r] + 12, 4);
+          vpx_memcpy(running_avg_y, running_buffer[r], width);
+          vpx_memcpy(running_avg_y + avg_y_stride,
+                     running_buffer[r] + width, width);
+          if (width == 4) {
+            vpx_memcpy(running_avg_y + avg_y_stride * 2,
+                       running_buffer[r] + width * 2, width);
+            vpx_memcpy(running_avg_y + avg_y_stride * 3,
+                       running_buffer[r] + width * 3, width);
+          }
           // Update pointers for next iteration.
-          running_avg_y += (avg_y_stride << 2);
+          running_avg_y += (avg_y_stride << shift);
         }
         sum_diff = sum_diff_16x1(acc_diff);
         if (abs(sum_diff) > sum_diff_thresh) {
@@ -232,101 +237,14 @@
   return FILTER_BLOCK;
 }
 
-static int vp9_denoiser_8xM_sse2(const uint8_t *sig, int sig_stride,
-                                 const uint8_t *mc_running_avg_y,
-                                 int mc_avg_y_stride,
-                                 uint8_t *running_avg_y, int avg_y_stride,
-                                 int increase_denoising,
-                                 BLOCK_SIZE bs,
-                                 int motion_magnitude) {
-  int sum_diff_thresh, r, sum_diff = 0;
-  const int shift_inc  = (increase_denoising &&
-                          motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
-                         1 : 0;
-  uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16];
-  __m128i acc_diff = _mm_setzero_si128();
-  const __m128i k_0 = _mm_setzero_si128();
-  const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
-  const __m128i k_8 = _mm_set1_epi8(8);
-  const __m128i k_16 = _mm_set1_epi8(16);
-  // Modify each level's adjustment according to motion_magnitude.
-  const __m128i l3 = _mm_set1_epi8(
-      (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6);
-  // Difference between level 3 and level 2 is 2.
-  const __m128i l32 = _mm_set1_epi8(2);
-  // Difference between level 2 and level 1 is 1.
-  const __m128i l21 = _mm_set1_epi8(1);
-
-  for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 1); ++r) {
-    vpx_memcpy(sig_buffer[r], sig, 8);
-    vpx_memcpy(sig_buffer[r] + 8, sig + sig_stride, 8);
-    vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, 8);
-    vpx_memcpy(mc_running_buffer[r] + 8, mc_running_avg_y +
-               mc_avg_y_stride, 8);
-    vpx_memcpy(running_buffer[r], running_avg_y, 8);
-    vpx_memcpy(running_buffer[r] + 8, running_avg_y +
-               avg_y_stride, 8);
-    acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
-                                      mc_running_buffer[r],
-                                      running_buffer[r],
-                                      &k_0, &k_4, &k_8, &k_16,
-                                      &l3, &l32, &l21, acc_diff);
-    vpx_memcpy(running_avg_y, running_buffer[r], 8);
-    vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 8, 8);
-    // Update pointers for next iteration.
-    sig += (sig_stride << 1);
-    mc_running_avg_y += (mc_avg_y_stride << 1);
-    running_avg_y += (avg_y_stride << 1);
-  }
-
-  {
-    sum_diff = sum_diff_16x1(acc_diff);
-    sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
-    if (abs(sum_diff) > sum_diff_thresh) {
-      // Before returning to copy the block (i.e., apply no denoising),
-      // check if we can still apply some (weaker) temporal filtering to
-      // this block, that would otherwise not be denoised at all. Simplest
-      // is to apply an additional adjustment to running_avg_y to bring it
-      // closer to sig. The adjustment is capped by a maximum delta, and
-      // chosen such that in most cases the resulting sum_diff will be
-      // within the acceptable range given by sum_diff_thresh.
-
-      // The delta is set by the excess of absolute pixel diff over the
-      // threshold.
-      const int delta = ((abs(sum_diff) - sum_diff_thresh)
-                         >> num_pels_log2_lookup[bs]) + 1;
-      // Only apply the adjustment for max delta up to 3.
-      if (delta < 4) {
-        const __m128i k_delta = _mm_set1_epi8(delta);
-        running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
-        for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 1); ++r) {
-          acc_diff = vp9_denoiser_adj_16x1_sse2(
-              sig_buffer[r], mc_running_buffer[r], running_buffer[r],
-              k_0, k_delta, acc_diff);
-          vpx_memcpy(running_avg_y, running_buffer[r], 8);
-          vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 8, 8);
-          // Update pointers for next iteration.
-          running_avg_y += (avg_y_stride << 1);
-        }
-        sum_diff = sum_diff_16x1(acc_diff);
-        if (abs(sum_diff) > sum_diff_thresh) {
-          return COPY_BLOCK;
-        }
-      } else {
-        return COPY_BLOCK;
-      }
-    }
-  }
-  return FILTER_BLOCK;
-}
-
-static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
-                                        const uint8_t *mc_running_avg_y,
-                                        int mc_avg_y_stride,
-                                        uint8_t *running_avg_y,
-                                        int avg_y_stride,
-                                        int increase_denoising, BLOCK_SIZE bs,
-                                        int motion_magnitude) {
+// Denoiser for 16xM, 32xM and 64xM blocks
+static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
+                                     const uint8_t *mc_running_avg_y,
+                                     int mc_avg_y_stride,
+                                     uint8_t *running_avg_y,
+                                     int avg_y_stride,
+                                     int increase_denoising, BLOCK_SIZE bs,
+                                     int motion_magnitude) {
   int sum_diff_thresh, r, c, sum_diff = 0;
   const int shift_inc  = (increase_denoising &&
                           motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
@@ -433,25 +351,25 @@
                              BLOCK_SIZE bs,
                              int motion_magnitude) {
   if (bs == BLOCK_4X4 || bs == BLOCK_4X8) {
-    return vp9_denoiser_4xM_sse2(sig, sig_stride,
-                                 mc_avg, mc_avg_stride,
-                                 avg, avg_stride,
-                                 increase_denoising,
-                                 bs, motion_magnitude);
+    return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
+                                       mc_avg, mc_avg_stride,
+                                       avg, avg_stride,
+                                       increase_denoising,
+                                       bs, motion_magnitude, 4);
   } else if (bs == BLOCK_8X4 || bs == BLOCK_8X8 || bs == BLOCK_8X16) {
-    return vp9_denoiser_8xM_sse2(sig, sig_stride,
-                                 mc_avg, mc_avg_stride,
-                                 avg, avg_stride,
-                                 increase_denoising,
-                                 bs, motion_magnitude);
+    return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
+                                       mc_avg, mc_avg_stride,
+                                       avg, avg_stride,
+                                       increase_denoising,
+                                       bs, motion_magnitude, 8);
   } else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 ||
              bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 ||
              bs == BLOCK_64X32 || bs == BLOCK_64X64) {
-    return vp9_denoiser_64_32_16xM_sse2(sig, sig_stride,
-                                        mc_avg, mc_avg_stride,
-                                        avg, avg_stride,
-                                        increase_denoising,
-                                        bs, motion_magnitude);
+    return vp9_denoiser_NxM_sse2_big(sig, sig_stride,
+                                     mc_avg, mc_avg_stride,
+                                     avg, avg_stride,
+                                     increase_denoising,
+                                     bs, motion_magnitude);
   } else {
     return COPY_BLOCK;
   }
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 3684bf0..d3c2a13 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -34,6 +34,7 @@
   unsigned int                cq_level;  // constrained quality level
   unsigned int                rc_max_intra_bitrate_pct;
   unsigned int                rc_max_inter_bitrate_pct;
+  unsigned int                gf_cbr_boost_pct;
   unsigned int                lossless;
   unsigned int                frame_parallel_decoding_mode;
   AQ_MODE                     aq_mode;
@@ -56,6 +57,7 @@
   10,                         // cq_level
   0,                          // rc_max_intra_bitrate_pct
   0,                          // rc_max_inter_bitrate_pct
+  0,                          // gf_cbr_boost_pct
   0,                          // lossless
   0,                          // frame_parallel_decoding_mode
   NO_AQ,                      // aq_mode
@@ -383,6 +385,7 @@
   oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate;
   oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
   oxcf->rc_max_inter_bitrate_pct = extra_cfg->rc_max_inter_bitrate_pct;
+  oxcf->gf_cbr_boost_pct = extra_cfg->gf_cbr_boost_pct;
 
   oxcf->best_allowed_q =
       extra_cfg->lossless ? 0 : vp9_quantizer_to_qindex(cfg->rc_min_quantizer);
@@ -660,6 +663,14 @@
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
+static vpx_codec_err_t ctrl_set_rc_gf_cbr_boost_pct(
+    vpx_codec_alg_priv_t *ctx, va_list args) {
+  struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.gf_cbr_boost_pct =
+      CAST(VP8E_SET_GF_CBR_BOOST_PCT, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 static vpx_codec_err_t ctrl_set_lossless(vpx_codec_alg_priv_t *ctx,
                                          va_list args) {
   struct vp9_extracfg extra_cfg = ctx->extra_cfg;
@@ -1278,6 +1289,7 @@
   {VP8E_SET_CQ_LEVEL,                 ctrl_set_cq_level},
   {VP8E_SET_MAX_INTRA_BITRATE_PCT,    ctrl_set_rc_max_intra_bitrate_pct},
   {VP8E_SET_MAX_INTER_BITRATE_PCT,    ctrl_set_rc_max_inter_bitrate_pct},
+  {VP8E_SET_GF_CBR_BOOST_PCT,         ctrl_set_rc_gf_cbr_boost_pct},
   {VP9E_SET_LOSSLESS,                 ctrl_set_lossless},
   {VP9E_SET_FRAME_PARALLEL_DECODING,  ctrl_set_frame_parallel_decoding_mode},
   {VP9E_SET_AQ_MODE,                  ctrl_set_aq_mode},
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 406a0d2..67e00fb 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -208,6 +208,19 @@
    */
   VP8E_SET_MAX_INTER_BITRATE_PCT,
 
+  /*!\brief Boost percentage for Golden Frame in CBR mode
+   *
+   * This value controls the amount of boost given to Golden Frame in
+   * CBR mode. It is expressed as a percentage of the average
+   * per-frame bitrate, with the special (and default) value 0 meaning
+   * the feature is off, i.e., no golden frame boost in CBR mode and
+   * average bitrate target is used.
+   *
+   * For example, to allow 100% more bits, i.e, 2X, in a golden frame
+   * than average frame, set this to 100.
+   *
+   */
+  VP8E_SET_GF_CBR_BOOST_PCT,
 
   /* TODO(jkoleszar): Move to vp9cx.h */
   VP9E_SET_LOSSLESS,
@@ -376,6 +389,7 @@
 VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
 VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int)
 
+VPX_CTRL_USE_TYPE(VP8E_SET_GF_CBR_BOOST_PCT, unsigned int)
 VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int)
 
 VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int)
diff --git a/vpxenc.c b/vpxenc.c
index e88dceb..2b89fc1 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -351,6 +351,8 @@
     NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)");
 static const arg_def_t max_inter_rate_pct = ARG_DEF(
     NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
+static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
+    NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
 
 #if CONFIG_VP8_ENCODER
 static const arg_def_t token_parts = ARG_DEF(
@@ -416,7 +418,8 @@
 static const arg_def_t *vp9_args[] = {
   &cpu_used, &auto_altref, &sharpness, &static_thresh,
   &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type,
-  &tune_ssim, &cq_level, &max_intra_rate_pct, &max_inter_rate_pct, &lossless,
+  &tune_ssim, &cq_level, &max_intra_rate_pct, &max_inter_rate_pct,
+  &gf_cbr_boost_pct, &lossless,
   &frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
   &noise_sens, &tune_content,
 #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
@@ -430,7 +433,7 @@
   VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS,
   VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
   VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
-  VP8E_SET_MAX_INTER_BITRATE_PCT,
+  VP8E_SET_MAX_INTER_BITRATE_PCT, VP8E_SET_GF_CBR_BOOST_PCT,
   VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
   VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY,
   VP9E_SET_TUNE_CONTENT,