Merge "Removed bmi copy to/from BLOCKD"
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
index 34a7e18..83d3765 100644
--- a/vp8/common/x86/idctllm_sse2.asm
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -11,7 +11,7 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-;void idct_dequant_0_2x_sse2
+;void vp8_idct_dequant_0_2x_sse2
 ; (
 ;   short *qcoeff       - 0
 ;   short *dequant      - 1
@@ -21,8 +21,8 @@
 ;   int blk_stride      - 5
 ; )
 
-global sym(idct_dequant_0_2x_sse2)
-sym(idct_dequant_0_2x_sse2):
+global sym(vp8_idct_dequant_0_2x_sse2)
+sym(vp8_idct_dequant_0_2x_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -97,8 +97,8 @@
     pop         rbp
     ret
 
-global sym(idct_dequant_full_2x_sse2)
-sym(idct_dequant_full_2x_sse2):
+global sym(vp8_idct_dequant_full_2x_sse2)
+sym(vp8_idct_dequant_full_2x_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -353,7 +353,7 @@
     pop         rbp
     ret
 
-;void idct_dequant_dc_0_2x_sse2
+;void vp8_idct_dequant_dc_0_2x_sse2
 ; (
 ;   short *qcoeff       - 0
 ;   short *dequant      - 1
@@ -362,8 +362,8 @@
 ;   int dst_stride      - 4
 ;   short *dc           - 5
 ; )
-global sym(idct_dequant_dc_0_2x_sse2)
-sym(idct_dequant_dc_0_2x_sse2):
+global sym(vp8_idct_dequant_dc_0_2x_sse2)
+sym(vp8_idct_dequant_dc_0_2x_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -438,8 +438,8 @@
     pop         rbp
     ret
 
-global sym(idct_dequant_dc_full_2x_sse2)
-sym(idct_dequant_dc_full_2x_sse2):
+global sym(vp8_idct_dequant_dc_full_2x_sse2)
+sym(vp8_idct_dequant_dc_full_2x_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
index 4c88db4..3a48068 100644
--- a/vp8/decoder/x86/idct_blk_sse2.c
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -12,17 +12,17 @@
 #include "vp8/common/idct.h"
 #include "vp8/decoder/dequantize.h"
 
-void idct_dequant_dc_0_2x_sse2
+void vp8_idct_dequant_dc_0_2x_sse2
             (short *q, short *dq, unsigned char *pre,
              unsigned char *dst, int dst_stride, short *dc);
-void idct_dequant_dc_full_2x_sse2
+void vp8_idct_dequant_dc_full_2x_sse2
             (short *q, short *dq, unsigned char *pre,
              unsigned char *dst, int dst_stride, short *dc);
 
-void idct_dequant_0_2x_sse2
+void vp8_idct_dequant_0_2x_sse2
             (short *q, short *dq ,unsigned char *pre,
              unsigned char *dst, int dst_stride, int blk_stride);
-void idct_dequant_full_2x_sse2
+void vp8_idct_dequant_full_2x_sse2
             (short *q, short *dq ,unsigned char *pre,
              unsigned char *dst, int dst_stride, int blk_stride);
 
@@ -35,14 +35,14 @@
     for (i = 0; i < 4; i++)
     {
         if (((short *)(eobs))[0] & 0xfefe)
-            idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc);
+            vp8_idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc);
         else
-            idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc);
+            vp8_idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc);
 
         if (((short *)(eobs))[1] & 0xfefe)
-            idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+            vp8_idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
         else
-            idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+            vp8_idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
 
         q    += 64;
         dc   += 4;
@@ -61,14 +61,14 @@
     for (i = 0; i < 4; i++)
     {
         if (((short *)(eobs))[0] & 0xfefe)
-            idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16);
+            vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16);
         else
-            idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16);
+            vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16);
 
         if (((short *)(eobs))[1] & 0xfefe)
-            idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+            vp8_idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
         else
-            idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+            vp8_idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
 
         q    += 64;
         pre  += 64;
@@ -82,33 +82,33 @@
              unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
 {
     if (((short *)(eobs))[0] & 0xfefe)
-        idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+        vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
     else
-        idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+        vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
 
     q    += 32;
     pre  += 32;
     dstu += stride*4;
 
     if (((short *)(eobs))[1] & 0xfefe)
-        idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+        vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
     else
-        idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+        vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
 
     q    += 32;
     pre  += 32;
 
     if (((short *)(eobs))[2] & 0xfefe)
-        idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+        vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
     else
-        idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+        vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
 
     q    += 32;
     pre  += 32;
     dstv += stride*4;
 
     if (((short *)(eobs))[3] & 0xfefe)
-        idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+        vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
     else
-        idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+        vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
 }
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 3f14124..19b52a3 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1255,7 +1255,11 @@
                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
             }
         }
-        vp8_update_zbin_extra(cpi, x);
+
+        /* The fast quantizer doesn't use zbin_extra, only do so with
+         * the regular quantizer. */
+        if (cpi->sf.improved_quant)
+            vp8_update_zbin_extra(cpi, x);
     }
 
     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 9cdc1e5..8559142 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1473,7 +1473,6 @@
 
     int i;
     double boost_score = 0.0;
-    double fwd_boost_score = 0.0;
     double mv_ratio_accumulator = 0.0;
     double decay_accumulator = 1.0;
     double this_frame_mv_in_out = 0.0;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ff9a641..35e187e 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3237,16 +3237,17 @@
     // Test code for segmentation of gf/arf (0,0)
     //segmentation_test_function((VP8_PTR) cpi);
 
-#if CONFIG_REALTIME_ONLY
-    if(cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME)
+    if (cpi->compressor_speed == 2)
     {
-        if(cpi->force_next_frame_intra)
+        if(cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME)
         {
-            cm->frame_type = KEY_FRAME;  /* delayed intra frame */
+            if(cpi->force_next_frame_intra)
+            {
+                cm->frame_type = KEY_FRAME;  /* delayed intra frame */
+            }
         }
+        cpi->force_next_frame_intra = 0;
     }
-    cpi->force_next_frame_intra = 0;
-#endif
 
     // For an alt ref frame in 2 pass we skip the call to the second pass function that sets the target bandwidth
 #if !(CONFIG_REALTIME_ONLY)
@@ -3775,15 +3776,15 @@
         // (assuming that we didn't)!
         if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME)
         {
+            int key_frame_decision = decide_key_frame(cpi);
 
-#if CONFIG_REALTIME_ONLY
+            if (cpi->compressor_speed == 2)
             {
                 /* we don't do re-encoding in realtime mode
                  * if key frame is decided than we force it on next frame */
-                cpi->force_next_frame_intra = decide_key_frame(cpi);
+                cpi->force_next_frame_intra = key_frame_decision;
             }
-#else
-            if (decide_key_frame(cpi))
+            else if (key_frame_decision)
             {
                 // Reset all our sizing numbers and recode
                 cm->frame_type = KEY_FRAME;
@@ -3820,7 +3821,6 @@
 
                 continue;
             }
-#endif
         }
 
         vp8_clear_system_state();
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index f75f6cb..aead2fb 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -609,9 +609,8 @@
     int *lf_ref_frame_sign_bias;
     int *lf_ref_frame;
 
-#if CONFIG_REALTIME_ONLY
     int force_next_frame_intra; /* force next frame to intra when kf_auto says so */
-#endif
+
     int droppable;
 } VP8_COMP;