Merge "mips msa configuration fix"
diff --git a/build/make/Android.mk b/build/make/Android.mk
index d897b44..3d3f57d 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -184,7 +184,11 @@
 	@$(RM) -r $(ASM_CNV_PATH)
 	@$(RM) $(CLEAN-OBJS)
 
-include $(BUILD_SHARED_LIBRARY)
+ifeq ($(ENABLE_SHARED),1)
+  include $(BUILD_SHARED_LIBRARY)
+else
+  include $(BUILD_STATIC_LIBRARY)
+endif
 
 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
 $(call import-module,cpufeatures)
diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc
index fc035af..dcf5fc5 100644
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -14,11 +14,12 @@
 #endif
 extern "C" {
 #if CONFIG_VP8
-extern void vp8_rtcd();
-#endif
+#include "./vp8_rtcd.h"
+#endif  // CONFIG_VP8
 #if CONFIG_VP9
-extern void vp9_rtcd();
-#endif
+#include "./vp9_rtcd.h"
+#endif  // CONFIG_VP9
+#include "./vpx_scale_rtcd.h"
 }
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
@@ -59,11 +60,12 @@
 
 #if CONFIG_VP8
   vp8_rtcd();
-#endif
+#endif  // CONFIG_VP8
 #if CONFIG_VP9
   vp9_rtcd();
-#endif
-#endif
+#endif  // CONFIG_VP9
+  vpx_scale_rtcd();
+#endif  // !CONFIG_SHARED
 
   return RUN_ALL_TESTS();
 }
diff --git a/third_party/x86inc/README.libvpx b/third_party/x86inc/README.libvpx
index 02cd9ab..343bcf9 100644
--- a/third_party/x86inc/README.libvpx
+++ b/third_party/x86inc/README.libvpx
@@ -9,3 +9,4 @@
 
 Local Modifications:
 Some modifications to allow PIC to work with x86inc.
+Conditionally define program_name to allow overriding.
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
index 9273fc9..bc81169 100644
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -36,7 +36,9 @@
 
 %include "vpx_config.asm"
 
+%ifndef program_name
 %define program_name vp9
+%endif
 
 
 %define UNIX64 0
diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c
index 0b371b0..ab0e9b4 100644
--- a/vp8/common/rtcd.c
+++ b/vp8/common/rtcd.c
@@ -7,15 +7,13 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
-#include "vpx_config.h"
+#include "./vpx_config.h"
 #define RTCD_C
-#include "vp8_rtcd.h"
+#include "./vp8_rtcd.h"
 #include "vpx_ports/vpx_once.h"
 
-extern void vpx_scale_rtcd(void);
 
 void vp8_rtcd()
 {
-    vpx_scale_rtcd();
     once(setup_rtcd_internal);
 }
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index bf8a853..a5dfd07 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -10,7 +10,8 @@
 
 
 #include "./vpx_config.h"
-#include "vp8_rtcd.h"
+#include "./vp8_rtcd.h"
+#include "./vpx_scale_rtcd.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "vpx_version.h"
@@ -649,6 +650,7 @@
 
 
     vp8_rtcd();
+    vpx_scale_rtcd();
 
     if (!ctx->priv)
     {
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 67a0fef..e0eb30a 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -11,7 +11,8 @@
 
 #include <stdlib.h>
 #include <string.h>
-#include "vp8_rtcd.h"
+#include "./vp8_rtcd.h"
+#include "./vpx_scale_rtcd.h"
 #include "vpx/vpx_decoder.h"
 #include "vpx/vp8dx.h"
 #include "vpx/internal/vpx_codec_internal.h"
@@ -106,6 +107,7 @@
     (void) data;
 
     vp8_rtcd();
+    vpx_scale_rtcd();
 
     /* This function only allocates space for the vpx_codec_alg_priv_t
      * structure. More memory may be required at the time the stream
diff --git a/vp9/common/vp9_rtcd.c b/vp9/common/vp9_rtcd.c
index c777bc8..2dfa09f 100644
--- a/vp9/common/vp9_rtcd.c
+++ b/vp9/common/vp9_rtcd.c
@@ -12,10 +12,7 @@
 #include "./vp9_rtcd.h"
 #include "vpx_ports/vpx_once.h"
 
-void vpx_scale_rtcd(void);
-
 void vp9_rtcd() {
-    vpx_scale_rtcd();
     // TODO(JBB): Remove this once, by insuring that both the encoder and
     // decoder setup functions are protected by once();
     once(setup_rtcd_internal);
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 358f22a..5480222 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -12,6 +12,7 @@
 #include <limits.h>
 #include <stdio.h>
 
+#include "./vp9_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
@@ -39,6 +40,7 @@
 
   if (!init_done) {
     vp9_rtcd();
+    vpx_scale_rtcd();
     vp9_init_intra_predictors();
     init_done = 1;
   }
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 367ab3c..e59d2c2 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -458,8 +458,7 @@
   return 0;
 }
 
-
-void vp9_set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
+void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
   SPEED_FEATURES *const sf = &cpi->sf;
   if (sf->partition_search_type != VAR_BASED_PARTITION &&
       sf->partition_search_type != REFERENCE_PARTITION) {
@@ -480,25 +479,41 @@
     // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
     // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
     if (is_key_frame) {
-      thresholds[0] = threshold_base;
-      thresholds[1] = threshold_base >> 2;
-      thresholds[2] = threshold_base >> 2;
-      thresholds[3] = threshold_base << 2;
+      cpi->vbp_thresholds[0] = threshold_base;
+      cpi->vbp_thresholds[1] = threshold_base >> 2;
+      cpi->vbp_thresholds[2] = threshold_base >> 2;
+      cpi->vbp_thresholds[3] = threshold_base << 2;
       cpi->vbp_bsize_min = BLOCK_8X8;
     } else {
-      thresholds[1] = threshold_base;
+      cpi->vbp_thresholds[1] = threshold_base;
       if (cm->width <= 352 && cm->height <= 288) {
-        thresholds[0] = threshold_base >> 2;
-        thresholds[2] = threshold_base << 3;
+        cpi->vbp_thresholds[0] = threshold_base >> 2;
+        cpi->vbp_thresholds[2] = threshold_base << 3;
       } else {
-        thresholds[0] = threshold_base;
-        thresholds[2] = threshold_base << cpi->oxcf.speed;
+        cpi->vbp_thresholds[0] = threshold_base;
+        cpi->vbp_thresholds[2] = threshold_base << cpi->oxcf.speed;
       }
       cpi->vbp_bsize_min = BLOCK_16X16;
     }
   }
 }
 
+static void modify_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int64_t threshold_base = (int64_t)(cpi->y_dequant[q][1]);
+
+  // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
+  // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
+  thresholds[1] = threshold_base;
+  if (cm->width <= 352 && cm->height <= 288) {
+    thresholds[0] = threshold_base >> 2;
+    thresholds[2] = threshold_base << 3;
+  } else {
+    thresholds[0] = threshold_base;
+    thresholds[2] = threshold_base << cpi->oxcf.speed;
+  }
+}
+
 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
                                  int dp, int x8_idx, int y8_idx, v8x8 *vst,
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -611,7 +626,7 @@
 
     if (cyclic_refresh_segment_id_boosted(segment_id)) {
       int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-      vp9_set_vbp_thresholds(cpi, thresholds, q);
+      modify_vbp_thresholds(cpi, thresholds, q);
     }
   }
 
@@ -3854,6 +3869,9 @@
     }
     vp9_zero(x->zcoeff_blk);
 
+    if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0)
+      cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
+
     if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
       source_var_based_partition_search_method(cpi);
   }
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
index 1027130..1acde02 100644
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -40,7 +40,7 @@
 void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td,
                      int tile_row, int tile_col);
 
-void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int64_t thresholds[], int q);
+void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int q);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 82f99b3..8a7ae8e 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -13,6 +13,7 @@
 #include <limits.h>
 
 #include "./vpx_config.h"
+#include "./vp9_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 #include "vpx/internal/vpx_psnr.h"
 #include "vpx_ports/vpx_timer.h"
@@ -293,6 +294,7 @@
 
   if (!init_done) {
     vp9_rtcd();
+    vpx_scale_rtcd();
     vp9_init_intra_predictors();
     vp9_init_me_luts();
     vp9_rc_init_minq_luts();
@@ -2958,7 +2960,7 @@
   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
 
   vp9_set_quantizer(cm, q);
-  vp9_set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
+  vp9_set_vbp_thresholds(cpi, q);
 
   setup_frame(cpi);
 
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 0ad3249..416f679 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1125,7 +1125,6 @@
 #endif
 
   if (cpi->rc.frames_since_golden == 0) {
-    cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
     usable_ref_frame = LAST_FRAME;
   } else {
     usable_ref_frame = GOLDEN_FRAME;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index c2f782b..e9c58cc 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -462,7 +462,6 @@
 static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
                                   const uint8_t **data, unsigned int data_sz,
                                   void *user_priv, int64_t deadline) {
-  vp9_ppflags_t flags = {0, 0, 0};
   const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
   (void)deadline;
 
@@ -547,9 +546,6 @@
     winterface->launch(worker);
   }
 
-  if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
-    set_ppflags(ctx, &flags);
-
   return VPX_CODEC_OK;
 }
 
diff --git a/vpx_ports/vpx_once.h b/vpx_ports/vpx_once.h
index bd9eebd..f1df394 100644
--- a/vpx_ports/vpx_once.h
+++ b/vpx_ports/vpx_once.h
@@ -110,7 +110,7 @@
 
 
 #else
-/* No-op version that performs no synchronization. vp8_rtcd() is idempotent,
+/* No-op version that performs no synchronization. *_rtcd() is idempotent,
  * so as long as your platform provides atomic loads/stores of pointers
  * no synchronization is strictly necessary.
  */
diff --git a/vpx_scale/vpx_scale_rtcd.c b/vpx_scale/vpx_scale_rtcd.c
index 656a22f..bea603f 100644
--- a/vpx_scale/vpx_scale_rtcd.c
+++ b/vpx_scale/vpx_scale_rtcd.c
@@ -7,9 +7,9 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
-#include "vpx_config.h"
+#include "./vpx_config.h"
 #define RTCD_C
-#include "vpx_scale_rtcd.h"
+#include "./vpx_scale_rtcd.h"
 #include "vpx_ports/vpx_once.h"
 
 void vpx_scale_rtcd()
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
deleted file mode 100644
index 4336ece..0000000
--- a/vpx_scale/win32/scaleopt.c
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-*   Module Title :     scaleopt.cpp
-*
-*   Description  :     Optimized scaling functions
-*
-****************************************************************************/
-#include "pragmas.h"
-
-/****************************************************************************
-*  Module Statics
-****************************************************************************/
-__declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 };
-
-#include "vpx_scale/vpx_scale.h"
-#include "vpx_mem/vpx_mem.h"
-
-__declspec(align(16)) const static unsigned short const54_2[] = {  0,  64, 128, 192 };
-__declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128,  64 };
-
-
-/****************************************************************************
- *
- *  ROUTINE       : horizontal_line_5_4_scale_mmx
- *
- *  INPUTS        : const unsigned char *source : Pointer to source data.
- *                  unsigned int source_width    : Stride of source.
- *                  unsigned char *dest         : Pointer to destination data.
- *                  unsigned int dest_width      : Stride of destination (NOT USED).
- *
- *  OUTPUTS       : None.
- *
- *  RETURNS       : void
- *
- *  FUNCTION      : Copies horizontal line of pixels from source to
- *                  destination scaling up by 4 to 5.
- *
- *  SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static
-void horizontal_line_5_4_scale_mmx
-(
-  const unsigned char *source,
-  unsigned int source_width,
-  unsigned char *dest,
-  unsigned int dest_width
-) {
-  /*
-  unsigned i;
-  unsigned int a, b, c, d, e;
-  unsigned char *des = dest;
-  const unsigned char *src = source;
-
-  (void) dest_width;
-
-  for ( i=0; i<source_width; i+=5 )
-  {
-      a = src[0];
-      b = src[1];
-      c = src[2];
-      d = src[3];
-      e = src[4];
-
-      des[0] = a;
-      des[1] = ((b*192 + c* 64 + 128)>>8);
-      des[2] = ((c*128 + d*128 + 128)>>8);
-      des[3] = ((d* 64 + e*192 + 128)>>8);
-
-      src += 5;
-      des += 4;
-  }
-  */
-  (void) dest_width;
-
-  __asm {
-
-    mov         esi,        source;
-    mov         edi,        dest;
-
-    mov         ecx,        source_width;
-    movq        mm5,        const54_1;
-
-    pxor        mm7,        mm7;
-    movq        mm6,        const54_2;
-
-    movq        mm4,        round_values;
-    lea         edx,        [esi+ecx];
-    horizontal_line_5_4_loop:
-
-    movq        mm0,        QWORD PTR  [esi];
-    00 01 02 03 04 05 06 07
-    movq        mm1,        mm0;
-    00 01 02 03 04 05 06 07
-
-    psrlq       mm0,        8;
-    01 02 03 04 05 06 07 xx
-    punpcklbw   mm1,        mm7;
-    xx 00 xx 01 xx 02 xx 03
-
-    punpcklbw   mm0,        mm7;
-    xx 01 xx 02 xx 03 xx 04
-    pmullw      mm1,        mm5
-
-    pmullw      mm0,        mm6
-    add         esi,        5
-
-    add         edi,        4
-    paddw       mm1,        mm0
-
-    paddw       mm1,        mm4
-    psrlw       mm1,        8
-
-    cmp         esi,        edx
-    packuswb    mm1,        mm7
-
-    movd        DWORD PTR [edi-4], mm1
-
-    jl          horizontal_line_5_4_loop
-
-  }
-
-}
-__declspec(align(16)) const static unsigned short one_fourths[]   = {  64,  64,  64, 64  };
-__declspec(align(16)) const static unsigned short two_fourths[]   = { 128, 128, 128, 128 };
-__declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 };
-
-static
-void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
-
-  __asm {
-    push        ebx
-
-    mov         esi,    source                    // Get the source and destination pointer
-    mov         ecx,    src_pitch               // Get the pitch size
-
-    mov         edi,    dest                    // tow lines below
-    pxor        mm7,    mm7                     // clear out mm7
-
-    mov         edx,    dest_pitch               // Loop counter
-    mov         ebx,    dest_width
-
-    vs_5_4_loop:
-
-    movd        mm0,    DWORD ptr [esi]         // src[0];
-    movd        mm1,    DWORD ptr [esi+ecx]     // src[1];
-
-    movd        mm2,    DWORD ptr [esi+ecx*2]
-    lea         eax,    [esi+ecx*2]             //
-
-    punpcklbw   mm1,    mm7
-    punpcklbw   mm2,    mm7
-
-    movq        mm3,    mm2
-    pmullw      mm1,    three_fourths
-
-    pmullw      mm2,    one_fourths
-    movd        mm4,    [eax+ecx]
-
-    pmullw      mm3,    two_fourths
-    punpcklbw   mm4,    mm7
-
-    movq        mm5,    mm4
-    pmullw      mm4,    two_fourths
-
-    paddw       mm1,    mm2
-    movd        mm6,    [eax+ecx*2]
-
-    pmullw      mm5,    one_fourths
-    paddw       mm1,    round_values;
-
-    paddw       mm3,    mm4
-    psrlw       mm1,    8
-
-    punpcklbw   mm6,    mm7
-    paddw       mm3,    round_values
-
-    pmullw      mm6,    three_fourths
-    psrlw       mm3,    8
-
-    packuswb    mm1,    mm7
-    packuswb    mm3,    mm7
-
-    movd        DWORD PTR [edi], mm0
-    movd        DWORD PTR [edi+edx], mm1
-
-
-    paddw       mm5,    mm6
-    movd        DWORD PTR [edi+edx*2], mm3
-
-    lea         eax,    [edi+edx*2]
-    paddw       mm5,    round_values
-
-    psrlw       mm5,    8
-    add         edi,    4
-
-    packuswb    mm5,    mm7
-    movd        DWORD PTR [eax+edx], mm5
-
-    add         esi,    4
-    sub         ebx,    4
-
-    jg         vs_5_4_loop
-
-    pop         ebx
-  }
-}
-
-
-__declspec(align(16)) const static unsigned short const53_1[] = {  0,  85, 171, 0 };
-__declspec(align(16)) const static unsigned short const53_2[] = {256, 171,  85, 0 };
-
-
-static
-void horizontal_line_5_3_scale_mmx
-(
-  const unsigned char *source,
-  unsigned int source_width,
-  unsigned char *dest,
-  unsigned int dest_width
-) {
-
-  (void) dest_width;
-  __asm {
-
-    mov         esi,        source;
-    mov         edi,        dest;
-
-    mov         ecx,        source_width;
-    movq        mm5,        const53_1;
-
-    pxor        mm7,        mm7;
-    movq        mm6,        const53_2;
-
-    movq        mm4,        round_values;
-    lea         edx,        [esi+ecx-5];
-    horizontal_line_5_3_loop:
-
-    movq        mm0,        QWORD PTR  [esi];
-    00 01 02 03 04 05 06 07
-    movq        mm1,        mm0;
-    00 01 02 03 04 05 06 07
-
-    psllw       mm0,        8;
-    xx 00 xx 02 xx 04 xx 06
-    psrlw       mm1,        8;
-    01 xx 03 xx 05 xx 07 xx
-
-    psrlw       mm0,        8;
-    00 xx 02 xx 04 xx 06 xx
-    psllq       mm1,        16;
-    xx xx 01 xx 03 xx 05 xx
-
-    pmullw      mm0,        mm6
-
-    pmullw      mm1,        mm5
-    add         esi,        5
-
-    add         edi,        3
-    paddw       mm1,        mm0
-
-    paddw       mm1,        mm4
-    psrlw       mm1,        8
-
-    cmp         esi,        edx
-    packuswb    mm1,        mm7
-
-    movd        DWORD PTR [edi-3], mm1
-    jl          horizontal_line_5_3_loop
-
-// exit condition
-    movq        mm0,        QWORD PTR  [esi];
-    00 01 02 03 04 05 06 07
-    movq        mm1,        mm0;
-    00 01 02 03 04 05 06 07
-
-    psllw       mm0,        8;
-    xx 00 xx 02 xx 04 xx 06
-    psrlw       mm1,        8;
-    01 xx 03 xx 05 xx 07 xx
-
-    psrlw       mm0,        8;
-    00 xx 02 xx 04 xx 06 xx
-    psllq       mm1,        16;
-    xx xx 01 xx 03 xx 05 xx
-
-    pmullw      mm0,        mm6
-
-    pmullw      mm1,        mm5
-    paddw       mm1,        mm0
-
-    paddw       mm1,        mm4
-    psrlw       mm1,        8
-
-    packuswb    mm1,        mm7
-    movd        eax,        mm1
-
-    mov         edx,        eax
-    shr         edx,        16
-
-    mov         WORD PTR[edi],   ax
-    mov         BYTE PTR[edi+2], dl
-
-  }
-
-}
-
-__declspec(align(16)) const static unsigned short one_thirds[] = {  85,  85,  85,  85 };
-__declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 };
-
-static
-void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
-
-  __asm {
-    push        ebx
-
-    mov         esi,    source                    // Get the source and destination pointer
-    mov         ecx,    src_pitch               // Get the pitch size
-
-    mov         edi,    dest                    // tow lines below
-    pxor        mm7,    mm7                     // clear out mm7
-
-    mov         edx,    dest_pitch               // Loop counter
-    movq        mm5,    one_thirds
-
-    movq        mm6,    two_thirds
-    mov         ebx,    dest_width;
-
-    vs_5_3_loop:
-
-    movd        mm0,    DWORD ptr [esi]         // src[0];
-    movd        mm1,    DWORD ptr [esi+ecx]     // src[1];
-
-    movd        mm2,    DWORD ptr [esi+ecx*2]
-    lea         eax,    [esi+ecx*2]             //
-
-    punpcklbw   mm1,    mm7
-    punpcklbw   mm2,    mm7
-
-    pmullw      mm1,    mm5
-    pmullw      mm2,    mm6
-
-    movd        mm3,    DWORD ptr [eax+ecx]
-    movd        mm4,    DWORD ptr [eax+ecx*2]
-
-    punpcklbw   mm3,    mm7
-    punpcklbw   mm4,    mm7
-
-    pmullw      mm3,    mm6
-    pmullw      mm4,    mm5
-
-
-    movd        DWORD PTR [edi], mm0
-    paddw       mm1,    mm2
-
-    paddw       mm1,    round_values
-    psrlw       mm1,    8
-
-    packuswb    mm1,    mm7
-    paddw       mm3,    mm4
-
-    paddw       mm3,    round_values
-    movd        DWORD PTR [edi+edx], mm1
-
-    psrlw       mm3,    8
-    packuswb    mm3,    mm7
-
-    movd        DWORD PTR [edi+edx*2], mm3
-
-
-    add         edi,    4
-    add         esi,    4
-
-    sub         ebx,    4
-    jg          vs_5_3_loop
-
-    pop         ebx
-  }
-}
-
-
-
-
-/****************************************************************************
- *
- *  ROUTINE       : horizontal_line_2_1_scale
- *
- *  INPUTS        : const unsigned char *source :
- *                  unsigned int source_width    :
- *                  unsigned char *dest         :
- *                  unsigned int dest_width      :
- *
- *  OUTPUTS       : None.
- *
- *  RETURNS       : void
- *
- *  FUNCTION      : 1 to 2 up-scaling of a horizontal line of pixels.
- *
- *  SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static
-void horizontal_line_2_1_scale_mmx
-(
-  const unsigned char *source,
-  unsigned int source_width,
-  unsigned char *dest,
-  unsigned int dest_width
-) {
-  (void) dest_width;
-  (void) source_width;
-  __asm {
-    mov         esi,    source
-    mov         edi,    dest
-
-    pxor        mm7,    mm7
-    mov         ecx,    dest_width
-
-    xor         edx,    edx
-    hs_2_1_loop:
-
-    movq        mm0,    [esi+edx*2]
-    psllw       mm0,    8
-
-    psrlw       mm0,    8
-    packuswb    mm0,    mm7
-
-    movd        DWORD Ptr [edi+edx], mm0;
-    add         edx,    4
-
-    cmp         edx,    ecx
-    jl          hs_2_1_loop
-
-  }
-}
-
-
-
-static
-void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
-  (void) dest_pitch;
-  (void) src_pitch;
-  vpx_memcpy(dest, source, dest_width);
-}
-
-
-__declspec(align(16)) const static unsigned short three_sixteenths[] = {  48,  48,  48,  48 };
-__declspec(align(16)) const static unsigned short ten_sixteenths[]   = { 160, 160, 160, 160 };
-
-static
-void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
-
-  (void) dest_pitch;
-  __asm {
-    mov         esi,        source
-    mov         edi,        dest
-
-    mov         eax,        src_pitch
-    mov         edx,        dest_width
-
-    pxor        mm7,        mm7
-    sub         esi,        eax             // back one line
-
-
-    lea         ecx,        [esi+edx];
-    movq        mm6,        round_values;
-
-    movq        mm5,        three_sixteenths;
-    movq        mm4,        ten_sixteenths;
-
-    vs_2_1_i_loop:
-    movd        mm0,        [esi]           //
-    movd        mm1,        [esi+eax]       //
-
-    movd        mm2,        [esi+eax*2]     //
-    punpcklbw   mm0,        mm7
-
-    pmullw      mm0,        mm5
-    punpcklbw   mm1,        mm7
-
-    pmullw      mm1,        mm4
-    punpcklbw   mm2,        mm7
-
-    pmullw      mm2,        mm5
-    paddw       mm0,        round_values
-
-    paddw       mm1,        mm2
-    paddw       mm0,        mm1
-
-    psrlw       mm0,        8
-    packuswb    mm0,        mm7
-
-    movd        DWORD PTR [edi],        mm0
-    add         esi,        4
-
-    add         edi,        4;
-    cmp         esi,        ecx
-    jl          vs_2_1_i_loop
-
-  }
-}
-
-
-
-void
-register_mmxscalers(void) {
-  vp8_vertical_band_5_4_scale           = vertical_band_5_4_scale_mmx;
-  vp8_vertical_band_5_3_scale           = vertical_band_5_3_scale_mmx;
-  vp8_vertical_band_2_1_scale           = vertical_band_2_1_scale_mmx;
-  vp8_vertical_band_2_1_scale_i         = vertical_band_2_1_scale_i_mmx;
-  vp8_horizontal_line_2_1_scale         = horizontal_line_2_1_scale_mmx;
-  vp8_horizontal_line_5_3_scale         = horizontal_line_5_3_scale_mmx;
-  vp8_horizontal_line_5_4_scale         = horizontal_line_5_4_scale_mmx;
-}