Merge "Code cleanup." into experimental
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index 36857bc..35f53ef 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -24,12 +24,20 @@
  **************************************************************************/
 #include <assert.h>
 #include <math.h>
+
 #include "./vpx_config.h"
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_idct.h"
 
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
+
+/* If we don't want to use ROUND_POWER_OF_TWO macro
+static INLINE int16_t round_power_of_two(int16_t value, int n) {
+  return (value + (1 << (n - 1))) >> n;
+}*/
+
 void vp9_short_inv_walsh4x4_x8_c(int16_t *input, int16_t *output, int pitch) {
   int i;
   int a1, b1, c1, d1;
@@ -38,10 +46,10 @@
   int shortpitch = pitch >> 1;
 
   for (i = 0; i < 4; i++) {
-    a1 = ((ip[0] + ip[3])) >> WHT_UPSCALE_FACTOR;
-    b1 = ((ip[1] + ip[2])) >> WHT_UPSCALE_FACTOR;
-    c1 = ((ip[1] - ip[2])) >> WHT_UPSCALE_FACTOR;
-    d1 = ((ip[0] - ip[3])) >> WHT_UPSCALE_FACTOR;
+    a1 = (ip[0] + ip[3]) >> WHT_UPSCALE_FACTOR;
+    b1 = (ip[1] + ip[2]) >> WHT_UPSCALE_FACTOR;
+    c1 = (ip[1] - ip[2]) >> WHT_UPSCALE_FACTOR;
+    d1 = (ip[0] - ip[3]) >> WHT_UPSCALE_FACTOR;
 
     op[0] = (a1 + b1 + 1) >> 1;
     op[1] = (c1 + d1) >> 1;
@@ -79,7 +87,7 @@
   int shortpitch = pitch >> 1;
 
   op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1;
-  op[1] = op[2] = op[3] = ((ip[0] >> WHT_UPSCALE_FACTOR) >> 1);
+  op[1] = op[2] = op[3] = (ip[0] >> WHT_UPSCALE_FACTOR) >> 1;
 
 
   ip = tmp;
@@ -101,9 +109,8 @@
   vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1);
 
   for (r = 0; r < 4; r++) {
-    for (c = 0; c < 4; c++) {
+    for (c = 0; c < 4; c++)
       dst_ptr[c] = clip_pixel(tmp[r * 4 + c] + pred_ptr[c]);
-    }
 
     dst_ptr += stride;
     pred_ptr += pitch;
@@ -150,7 +157,7 @@
       temp_in[j] = out[j * 4 + i];
     idct4_1d(temp_in, temp_out);
     for (j = 0; j < 4; ++j)
-      output[j * short_pitch + i] = (temp_out[j] + 8) >> 4;
+      output[j * short_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
   }
 }
 
@@ -159,19 +166,12 @@
   int a1;
   int16_t *op = output;
   int shortpitch = pitch >> 1;
-  int tmp;
-  int16_t out;
-  tmp = input[0] * cospi_16_64;
-  out = dct_const_round_shift(tmp);
-  tmp = out * cospi_16_64;
-  out = dct_const_round_shift(tmp);
-  a1 = (out + 8) >> 4;
+  int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
+  out = dct_const_round_shift(out * cospi_16_64);
+  a1 = ROUND_POWER_OF_TWO(out, 4);
 
   for (i = 0; i < 4; i++) {
-    op[0] = a1;
-    op[1] = a1;
-    op[2] = a1;
-    op[3] = a1;
+    op[0] = op[1] = op[2] = op[3] = a1;
     op += shortpitch;
   }
 }
@@ -180,18 +180,14 @@
                             uint8_t *dst_ptr, int pitch, int stride) {
   int a1;
   int r, c;
-  int tmp;
-  int16_t out;
-  tmp = input_dc * cospi_16_64;
-  out = dct_const_round_shift(tmp);
-  tmp = out * cospi_16_64;
-  out = dct_const_round_shift(tmp);
-  a1 = (out + 8) >> 4;
+  int16_t out = dct_const_round_shift(input_dc * cospi_16_64);
+  out = dct_const_round_shift(out * cospi_16_64);
+  a1 = ROUND_POWER_OF_TWO(out, 4);
 
   for (r = 0; r < 4; r++) {
-    for (c = 0; c < 4; c++) {
+    for (c = 0; c < 4; c++)
       dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]);
-    }
+
     dst_ptr += stride;
     pred_ptr += pitch;
   }
@@ -262,7 +258,7 @@
       temp_in[j] = out[j * 8 + i];
     idct8_1d(temp_in, temp_out);
     for (j = 0; j < 8; ++j)
-      output[j * short_pitch + i] = (temp_out[j] + 16) >> 5;
+      output[j * short_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);
   }
 }
 
@@ -354,7 +350,7 @@
       temp_in[j] = out[j * 4 + i];
     invc(temp_in, temp_out);
     for (j = 0; j < 4; ++j)
-      output[j * short_pitch + i] = (temp_out[j] + 8) >> 4;
+      output[j * short_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
   }
 }
 
@@ -373,7 +369,7 @@
 
   if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
     output[0] = output[1] = output[2] = output[3] = output[4]
-                    = output[5] = output[6] = output[7] = 0;
+              = output[5] = output[6] = output[7] = 0;
     return;
   }
 
@@ -483,7 +479,7 @@
       temp_in[j] = out[j * 8 + i];
     invc(temp_in, temp_out);
     for (j = 0; j < 8; ++j)
-      output[j * short_pitch + i] = (temp_out[j] + 16) >> 5;
+      output[j * short_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);
   }
 }
 
@@ -509,14 +505,14 @@
       temp_in[j] = out[j * 8 + i];
     idct8_1d(temp_in, temp_out);
     for (j = 0; j < 8; ++j)
-      output[j * short_pitch + i] = (temp_out[j] + 16) >> 5;
+      output[j * short_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);
   }
 }
 
 void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output) {
   int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
   out = dct_const_round_shift(out * cospi_16_64);
-  output[0] = (out + 16) >> 5;
+  output[0] = ROUND_POWER_OF_TWO(out, 5);
 }
 
 void idct16_1d(int16_t *input, int16_t *output) {
@@ -703,7 +699,7 @@
       temp_in[j] = out[j * 16 + i];
     idct16_1d(temp_in, temp_out);
     for (j = 0; j < 16; ++j)
-      output[j * 16 + i] = (temp_out[j] + 32) >> 6;
+      output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
   }
 }
 
@@ -924,7 +920,7 @@
       temp_in[j] = out[j * 16 + i];
     invc(temp_in, temp_out);
     for (j = 0; j < 16; ++j)
-      output[j * 16 + i] = (temp_out[j] + 32) >> 6;
+      output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
   }
 }
 
@@ -951,19 +947,15 @@
         temp_in[j] = out[j*16 + i];
       idct16_1d(temp_in, temp_out);
       for (j = 0; j < 16; ++j)
-        output[j*16 + i] = (temp_out[j] + 32) >> 6;
+        output[j*16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
     }
 }
 
 
 void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) {
-  int tmp;
-  int16_t out;
-  tmp = input[0] * cospi_16_64;
-  out = dct_const_round_shift(tmp);
-  tmp = out * cospi_16_64;
-  out = dct_const_round_shift(tmp);
-  *output = (out + 32) >> 6;
+  int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
+  out = dct_const_round_shift(out * cospi_16_64);
+  output[0] = ROUND_POWER_OF_TWO(out, 6);
 }
 
 void idct32_1d(int16_t *input, int16_t *output) {
@@ -1352,12 +1344,12 @@
       temp_in[j] = out[j * 32 + i];
     idct32_1d(temp_in, temp_out);
     for (j = 0; j < 32; ++j)
-      output[j * 32 + i] = (temp_out[j] + 32) >> 6;
+      output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
   }
 }
 
 void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
   int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
   out = dct_const_round_shift(out * cospi_16_64);
-  output[0] = (out + 32) >> 6;
+  output[0] = ROUND_POWER_OF_TWO(out, 6);
 }
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index a371eeb..e2f3e26 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -728,10 +728,10 @@
   // column transform
   for (i = 0; i < 16; ++i) {
     for (j = 0; j < 16; ++j)
-      temp_in[j] = input[j * short_pitch + i];
+      temp_in[j] = input[j * short_pitch + i] << 2;
     fwdc(temp_in, temp_out);
     for (j = 0; j < 16; ++j)
-      outptr[j * 16 + i] = temp_out[j];
+      outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
   }
 
   // row transform
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 0a407df..a492487 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -844,16 +844,13 @@
   power_term = (vp9_convert_qindex_to_q(Q) * 0.01) + pt_low;
   power_term = (power_term > pt_high) ? pt_high : power_term;
 
-  // Adjustments to error term
-  // TBD
-
   // Calculate correction factor
   correction_factor = pow(error_term, power_term);
 
   // Clip range
   correction_factor =
     (correction_factor < 0.05)
-    ? 0.05 : (correction_factor > 2.0) ? 2.0 : correction_factor;
+    ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor;
 
   return correction_factor;
 }
@@ -887,8 +884,7 @@
 
 static int estimate_max_q(VP9_COMP *cpi,
                           FIRSTPASS_STATS *fpstats,
-                          int section_target_bandwitdh,
-                          int overhead_bits) {
+                          int section_target_bandwitdh) {
   int Q;
   int num_mbs = cpi->common.MBs;
   int target_norm_bits_per_mb;
@@ -899,7 +895,6 @@
   double err_per_mb = section_err / num_mbs;
   double err_correction_factor;
   double speed_correction = 1.0;
-  double overhead_bits_per_mb;
 
   if (section_target_bandwitdh <= 0)
     return cpi->twopass.maxq_max_limit;          // Highest value allowed
@@ -951,13 +946,6 @@
       speed_correction = 1.25;
   }
 
-  // Estimate of overhead bits per mb
-  // Correction to overhead bits for min allowed Q.
-  // PGW TODO.. This code is broken for the extended Q range
-  //            for now overhead set to 0.
-  overhead_bits_per_mb = overhead_bits / num_mbs;
-  overhead_bits_per_mb *= pow(0.98, (double)cpi->twopass.maxq_min_limit);
-
   // Try and pick a max Q that will be high enough to encode the
   // content at the given rate.
   for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) {
@@ -968,23 +956,9 @@
       sr_correction * speed_correction *
       cpi->twopass.est_max_qcorrection_factor;
 
-    if (err_correction_factor < 0.05)
-      err_correction_factor = 0.05;
-    else if (err_correction_factor > 5.0)
-      err_correction_factor = 5.0;
 
     bits_per_mb_at_this_q =
-      vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;
-
-    bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *
-                                  (double)bits_per_mb_at_this_q);
-
-    // Mode and motion overhead
-    // As Q rises in real encode loop rd code will force overhead down
-    // We make a crude adjustment for this here as *.98 per Q step.
-    // PGW TODO.. This code is broken for the extended Q range
-    //            for now overhead set to 0.
-    // overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
+      vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor);
 
     if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
       break;
@@ -1002,7 +976,7 @@
   // PGW TODO.. This code is broken for the extended Q range
   if ((cpi->ni_frames >
        ((int)cpi->twopass.total_stats->count >> 8)) &&
-      (cpi->ni_frames > 150)) {
+      (cpi->ni_frames > 25)) {
     adjust_maxq_qrange(cpi);
   }
 
@@ -1013,8 +987,7 @@
 // complexity and data rate.
 static int estimate_cq(VP9_COMP *cpi,
                        FIRSTPASS_STATS *fpstats,
-                       int section_target_bandwitdh,
-                       int overhead_bits) {
+                       int section_target_bandwitdh) {
   int Q;
   int num_mbs = cpi->common.MBs;
   int target_norm_bits_per_mb;
@@ -1027,15 +1000,11 @@
   double speed_correction = 1.0;
   double clip_iiratio;
   double clip_iifactor;
-  double overhead_bits_per_mb;
-
 
   target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20))
                             ? (512 * section_target_bandwitdh) / num_mbs
                             : 512 * (section_target_bandwitdh / num_mbs);
 
-  // Estimate of overhead bits per mb
-  overhead_bits_per_mb = overhead_bits / num_mbs;
 
   // Corrections for higher compression speed settings
   // (reduced compression expected)
@@ -1074,23 +1043,8 @@
       calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, Q) *
       sr_correction * speed_correction * clip_iifactor;
 
-    if (err_correction_factor < 0.05)
-      err_correction_factor = 0.05;
-    else if (err_correction_factor > 5.0)
-      err_correction_factor = 5.0;
-
     bits_per_mb_at_this_q =
-      vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;
-
-    bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *
-                                  (double)bits_per_mb_at_this_q);
-
-    // Mode and motion overhead
-    // As Q rises in real encode loop rd code will force overhead down
-    // We make a crude adjustment for this here as *.98 per Q step.
-    // PGW TODO.. This code is broken for the extended Q range
-    //            for now overhead set to 0.
-    overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
+      vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor);
 
     if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
       break;
@@ -1953,8 +1907,6 @@
   double this_frame_intra_error;
   double this_frame_coded_error;
 
-  int overhead_bits;
-
   if (!cpi->twopass.stats_in) {
     return;
   }
@@ -2018,11 +1970,6 @@
   if (cpi->target_bandwidth < 0)
     cpi->target_bandwidth = 0;
 
-
-  // Account for mv, mode and other overheads.
-  overhead_bits = (int)estimate_modemvcost(
-                        cpi, cpi->twopass.total_left_stats);
-
   // Special case code for first frame.
   if (cpi->common.current_video_frame == 0) {
     cpi->twopass.est_max_qcorrection_factor = 1.0;
@@ -2034,8 +1981,7 @@
       est_cq =
         estimate_cq(cpi,
                     cpi->twopass.total_left_stats,
-                    (int)(cpi->twopass.bits_left / frames_left),
-                    overhead_bits);
+                    (int)(cpi->twopass.bits_left / frames_left));
 
       cpi->cq_target_quality = cpi->oxcf.cq_level;
       if (est_cq > cpi->cq_target_quality)
@@ -2049,21 +1995,23 @@
     tmp_q = estimate_max_q(
               cpi,
               cpi->twopass.total_left_stats,
-              (int)(cpi->twopass.bits_left / frames_left),
-              overhead_bits);
+              (int)(cpi->twopass.bits_left / frames_left));
 
     cpi->active_worst_quality         = tmp_q;
     cpi->ni_av_qi                     = tmp_q;
     cpi->avg_q                        = vp9_convert_qindex_to_q(tmp_q);
 
+#ifndef ONE_SHOT_Q_ESTIMATE
     // Limit the maxq value returned subsequently.
     // This increases the risk of overspend or underspend if the initial
     // estimate for the clip is bad, but helps prevent excessive
     // variation in Q, especially near the end of a clip
     // where for example a small overspend may cause Q to crash
     adjust_maxq_qrange(cpi);
+#endif
   }
 
+#ifndef ONE_SHOT_Q_ESTIMATE
   // The last few frames of a clip almost always have to few or too many
   // bits and for the sake of over exact rate control we dont want to make
   // radical adjustments to the allowed quantizer range just to use up a
@@ -2078,13 +2026,13 @@
     tmp_q = estimate_max_q(
               cpi,
               cpi->twopass.total_left_stats,
-              (int)(cpi->twopass.bits_left / frames_left),
-              overhead_bits);
+              (int)(cpi->twopass.bits_left / frames_left));
 
     // Make a damped adjustment to active max Q
     cpi->active_worst_quality =
       adjust_active_maxq(cpi->active_worst_quality, tmp_q);
   }
+#endif
 
   cpi->twopass.frames_to_key--;
 
@@ -2092,7 +2040,6 @@
   subtract_stats(cpi->twopass.total_left_stats, &this_frame);
 }
 
-
 static int test_candidate_kf(VP9_COMP *cpi,
                              FIRSTPASS_STATS *last_frame,
                              FIRSTPASS_STATS *this_frame,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 40a1263..feb1e36 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -3321,11 +3321,19 @@
   if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
     cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
 
-  // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
-  cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
-  cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
-  cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
-  cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32;
+  // Rolling monitors of whether we are over or underspending used to help
+  // regulate min and Max Q in two pass.
+  if (cm->frame_type != KEY_FRAME) {
+    cpi->rolling_target_bits =
+      ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
+    cpi->rolling_actual_bits =
+      ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
+    cpi->long_rolling_target_bits =
+      ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
+    cpi->long_rolling_actual_bits =
+      ((cpi->long_rolling_actual_bits * 31) +
+       cpi->projected_frame_size + 16) / 32;
+  }
 
   // Actual bits spent
   cpi->total_actual_bits    += cpi->projected_frame_size;
@@ -3551,7 +3559,12 @@
     vp9_second_pass(cpi);
 
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+
+#ifdef DISABLE_RC_LONG_TERM_MEM
+  cpi->twopass.bits_left -=  cpi->this_frame_target;
+#else
   cpi->twopass.bits_left -= 8 * *size;
+#endif
 
   if (!cpi->refresh_alt_ref_frame) {
     double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.frame_rate;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 6b9bf87..9b509ea 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -29,6 +29,10 @@
 #include "vp9/common/vp9_findnearmv.h"
 #include "vp9/encoder/vp9_lookahead.h"
 
+// Experimental rate control switches
+// #define ONE_SHOT_Q_ESTIMATE 1
+// #define DISABLE_RC_LONG_TERM_MEM 1
+
 // #define SPEEDSTATS 1
 #define MIN_GF_INTERVAL             4
 #define DEFAULT_GF_INTERVAL         7
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 53d931c..a2a7957 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -114,13 +114,19 @@
   return retval;
 }
 
-int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex) {
-  if (frame_type == KEY_FRAME)
-    return (int)(4500000 / vp9_convert_qindex_to_q(qindex));
-  else
-    return (int)(2850000 / vp9_convert_qindex_to_q(qindex));
-}
+int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+                    double correction_factor) {
+  int enumerator;
+  double q = vp9_convert_qindex_to_q(qindex);
 
+  if (frame_type == KEY_FRAME) {
+    enumerator = 4500000;
+  } else {
+    enumerator = 2850000;
+  }
+
+  return (int)(0.5 + (enumerator * correction_factor / q));
+}
 
 void vp9_save_coding_context(VP9_COMP *cpi) {
   CODING_CONTEXT *const cc = &cpi->coding_context;
@@ -259,7 +265,7 @@
 
 static int estimate_bits_at_q(int frame_kind, int Q, int MBs,
                               double correction_factor) {
-  int Bpm = (int)(.5 + correction_factor * vp9_bits_per_mb(frame_kind, Q));
+  int Bpm = (int)(vp9_bits_per_mb(frame_kind, Q, correction_factor));
 
   /* Attempt to retain reasonable accuracy without overflow. The cutoff is
    * chosen such that the maximum product of Bpm and MBs fits 31 bits. The
@@ -397,12 +403,12 @@
       rate_correction_factor = cpi->rate_correction_factor;
   }
 
-  // Work out how big we would have expected the frame to be at this Q given the current correction factor.
+  // Work out how big we would have expected the frame to be at this Q given
+  // the current correction factor.
   // Stay in double to avoid int overflow when values are large
   projected_size_based_on_q =
-    (int)(((.5 + rate_correction_factor *
-            vp9_bits_per_mb(cpi->common.frame_type, Q)) *
-           cpi->common.MBs) / (1 << BPER_MB_NORMBITS));
+    estimate_bits_at_q(cpi->common.frame_type, Q,
+                       cpi->common.MBs, rate_correction_factor);
 
   // Work out a size correction factor.
   // if ( cpi->this_frame_target > 0 )
@@ -485,8 +491,7 @@
 
   do {
     bits_per_mb_at_this_q =
-      (int)(.5 + correction_factor *
-            vp9_bits_per_mb(cpi->common.frame_type, i));
+      (int)(vp9_bits_per_mb(cpi->common.frame_type, i, correction_factor));
 
     if (bits_per_mb_at_this_q <= target_bits_per_mb) {
       if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index c5817d7..4733176 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -32,7 +32,8 @@
 
 double vp9_convert_qindex_to_q(int qindex);
 int vp9_gfboost_qadjust(int qindex);
-int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex);
+extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+                           double correction_factor);
 void vp9_setup_inter_frame(VP9_COMP *cpi);
 
 #endif  // VP9_ENCODER_VP9_RATECTRL_H_