Merge "Removed unused diff buffer"

diff --git a/args.c b/args.c
index 7b2cc3a..37ba778 100644
--- a/args.c
+++ b/args.c

@@ -57,7 +57,7 @@
     }
     else if (def->long_name)
     {
-        int name_len = strlen(def->long_name);
+        const size_t name_len = strlen(def->long_name);
 
         if (strlen(arg.argv[0]) >= name_len + 2
             && arg.argv[0][1] == '-'

diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl
index 78f4a97..81280bf 100755
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl

@@ -30,6 +30,8 @@
 
 my @incoming_array;
 
+my @imported_functions;
+
 # Perl trim function to remove whitespace from the start and end of the string
 sub trim($)
 {
@@ -132,7 +134,18 @@
     # Make function visible to linker, and make additional symbol with
     # prepended underscore
     s/EXPORT\s+\|([\$\w]*)\|/.globl _$1\n\t.globl $1/;
-    s/IMPORT\s+\|([\$\w]*)\|/.globl $1/;
+
+    # Prepend imported functions with _
+    if (s/IMPORT\s+\|([\$\w]*)\|/.globl $1/)
+    {
+        $function = trim($1);
+        push(@imported_functions, $function);
+    }
+
+    foreach $function (@imported_functions)
+    {
+        s/$function/_$function/;
+    }
 
     # No vertical bars required; make additional symbol with prepended
     # underscore
@@ -157,8 +170,8 @@
     s/\sPRESERVE8/@ PRESERVE8/g;
 
     # Strip PROC and ENDPROC
-    s/PROC/@/g;
-    s/ENDP/@/g;
+    s/\bPROC\b/@/g;
+    s/\bENDP\b/@/g;
 
     # EQU directive
     s/(.*)EQU(.*)/.set $1, $2/;

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 1279f78..0426f92 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh

@@ -561,6 +561,10 @@
                 tgt_isa=x86_64
                 tgt_os=darwin10
                 ;;
+            *darwin11*)
+                tgt_isa=x86_64
+                tgt_os=darwin11
+                ;;
             *mingw32*|*cygwin*)
                 [ -z "$tgt_isa" ] && tgt_isa=x86
                 tgt_os=win32
@@ -617,6 +621,9 @@
     if [ -d "/Developer/SDKs/MacOSX10.6.sdk" ]; then
         osx_sdk_dir="/Developer/SDKs/MacOSX10.6.sdk"
     fi
+    if [ -d "/Developer/SDKs/MacOSX10.7.sdk" ]; then
+        osx_sdk_dir="/Developer/SDKs/MacOSX10.7.sdk"
+    fi
 
     case ${toolchain} in
         *-darwin8-*)
@@ -637,6 +644,12 @@
             add_ldflags "-isysroot ${osx_sdk_dir}"
             add_ldflags "-mmacosx-version-min=10.6"
             ;;
+        *-darwin11-*)
+            add_cflags  "-isysroot ${osx_sdk_dir}"
+            add_cflags  "-mmacosx-version-min=10.7"
+            add_ldflags "-isysroot ${osx_sdk_dir}"
+            add_ldflags "-mmacosx-version-min=10.7"
+            ;;
     esac
 
     # Handle Solaris variants. Solaris 10 needs -lposix4
@@ -732,7 +745,7 @@
             TOOLCHAIN_PATH=${SDK_PATH}/usr/bin
             CC=${TOOLCHAIN_PATH}/gcc
             AR=${TOOLCHAIN_PATH}/ar
-            LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-gcc-4.2.1
+            LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-llvm-gcc-4.2
             AS=${TOOLCHAIN_PATH}/as
             STRIP=${TOOLCHAIN_PATH}/strip
             NM=${TOOLCHAIN_PATH}/nm
@@ -746,13 +759,13 @@
             add_cflags -arch ${tgt_isa}
             add_ldflags -arch_only ${tgt_isa}
 
-            add_cflags  "-isysroot ${SDK_PATH}/SDKs/iPhoneOS4.3.sdk"
+            add_cflags  "-isysroot ${SDK_PATH}/SDKs/iPhoneOS5.0.sdk"
 
             # This should be overridable
-            alt_libc=${SDK_PATH}/SDKs/iPhoneOS4.3.sdk
+            alt_libc=${SDK_PATH}/SDKs/iPhoneOS5.0.sdk
 
             # Add the paths for the alternate libc
-            for d in usr/include usr/include/gcc/darwin/4.2/ usr/lib/gcc/arm-apple-darwin10/4.2.1/include/; do
+            for d in usr/include; do
                 try_dir="${alt_libc}/${d}"
                 [ -d "${try_dir}" ] && add_cflags -I"${try_dir}"
             done

diff --git a/configure b/configure
index 363687a..6f20c6b 100755
--- a/configure
+++ b/configure

@@ -119,6 +119,7 @@
 all_platforms="${all_platforms} x86-win32-vs9"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-darwin10-gcc"
+all_platforms="${all_platforms} x86_64-darwin11-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
 all_platforms="${all_platforms} x86_64-solaris-gcc"

diff --git a/examples.mk b/examples.mk
index 1f7dcc1..f6c9045 100644
--- a/examples.mk
+++ b/examples.mk

@@ -102,6 +102,7 @@
                          += third_party/libyuv/include/libyuv/basic_types.h  \
                             third_party/libyuv/include/libyuv/cpu_id.h  \
                             third_party/libyuv/include/libyuv/scale.h  \
+                            third_party/libyuv/source/row.h \
                             third_party/libyuv/source/scale.c  \
                             third_party/libyuv/source/cpu_id.c
 vp8_multi_resolution_encoder.GUID         = 04f8738e-63c8-423b-90fa-7c2703a374de

diff --git a/third_party/libyuv/README.webm b/third_party/libyuv/README.webm
index 32766be..d3495ca 100644
--- a/third_party/libyuv/README.webm
+++ b/third_party/libyuv/README.webm

@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 90
+Version: 102
 License: BSD
 License File: LICENSE
 

diff --git a/third_party/libyuv/include/libyuv/basic_types.h b/third_party/libyuv/include/libyuv/basic_types.h
index 87f8bd2..30504ce 100644
--- a/third_party/libyuv/include/libyuv/basic_types.h
+++ b/third_party/libyuv/include/libyuv/basic_types.h

@@ -13,22 +13,13 @@
 
 #include <stddef.h>  // for NULL, size_t
 
-#ifndef WIN32
+#if !(defined(_MSC_VER) && (_MSC_VER < 1600))
 #include <stdint.h>  // for uintptr_t
 #endif
 
 #ifndef INT_TYPES_DEFINED
 #define INT_TYPES_DEFINED
 #ifdef COMPILER_MSVC
-typedef __int64 int64;
-#else
-typedef long long int64;
-#endif /* COMPILER_MSVC */
-typedef int int32;
-typedef short int16;
-typedef char int8;
-
-#ifdef COMPILER_MSVC
 typedef unsigned __int64 uint64;
 typedef __int64 int64;
 #ifndef INT64_C
@@ -38,9 +29,20 @@
 #define UINT64_C(x) x ## UI64
 #endif
 #define INT64_F "I64"
-#else
+#else  // COMPILER_MSVC
+#ifdef __LP64__
+typedef unsigned long uint64;
+typedef long int64;
+#ifndef INT64_C
+#define INT64_C(x) x ## L
+#endif
+#ifndef UINT64_C
+#define UINT64_C(x) x ## UL
+#endif
+#define INT64_F "l"
+#else  // __LP64__
 typedef unsigned long long uint64;
-//typedef long long int64;
+typedef long long int64;
 #ifndef INT64_C
 #define INT64_C(x) x ## LL
 #endif
@@ -48,10 +50,14 @@
 #define UINT64_C(x) x ## ULL
 #endif
 #define INT64_F "ll"
-#endif /* COMPILER_MSVC */
+#endif  // __LP64__
+#endif  // COMPILER_MSVC
 typedef unsigned int uint32;
+typedef int int32;
 typedef unsigned short uint16;
+typedef short int16;
 typedef unsigned char uint8;
+typedef char int8;
 #endif  // INT_TYPES_DEFINED
 
 // Detect compiler is for x86 or x64.
@@ -60,7 +66,6 @@
 #define CPU_X86 1
 #endif
 
-#define IS_ALIGNED(p, a) (0==((uintptr_t)(p) & ((a)-1)))
 #define ALIGNP(p, t) \
   ((uint8*)((((uintptr_t)(p) + \
   ((t)-1)) & ~((t)-1))))

diff --git a/third_party/libyuv/include/libyuv/cpu_id.h b/third_party/libyuv/include/libyuv/cpu_id.h
index 8ebafe9..4a53b5b 100644
--- a/third_party/libyuv/include/libyuv/cpu_id.h
+++ b/third_party/libyuv/include/libyuv/cpu_id.h

@@ -11,21 +11,39 @@
 #ifndef INCLUDE_LIBYUV_CPU_ID_H_
 #define INCLUDE_LIBYUV_CPU_ID_H_
 
-//namespace libyuv {
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
 
 // These flags are only valid on x86 processors
 static const int kCpuHasSSE2 = 1;
 static const int kCpuHasSSSE3 = 2;
 
-// SIMD support on ARM processors
+// These flags are only valid on ARM processors
 static const int kCpuHasNEON = 4;
 
+// Internal flag to indicate cpuid is initialized.
+static const int kCpuInitialized = 8;
+
 // Detect CPU has SSE2 etc.
-int TestCpuFlag(int flag);
+// test_flag parameter should be one of kCpuHas constants above
+// returns non-zero if instruction set is detected
+static __inline int TestCpuFlag(int test_flag) {
+  extern int cpu_info_;
+  extern int InitCpuFlags();
+  return (cpu_info_ ? cpu_info_ : InitCpuFlags()) & test_flag;
+}
 
 // For testing, allow CPU flags to be disabled.
-void MaskCpuFlagsForTest(int enable_flags);
+// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
+// -1 to enable all cpu specific optimizations.
+// 0 to disable all cpu specific optimizations.
+void MaskCpuFlags(int enable_flags);
 
-//}  // namespace libyuv
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
 
 #endif  // INCLUDE_LIBYUV_CPU_ID_H_

diff --git a/third_party/libyuv/include/libyuv/scale.h b/third_party/libyuv/include/libyuv/scale.h
index 5b2d364..21fe360 100644
--- a/third_party/libyuv/include/libyuv/scale.h
+++ b/third_party/libyuv/include/libyuv/scale.h

@@ -13,7 +13,10 @@
 
 #include "third_party/libyuv/include/libyuv/basic_types.h"
 
-//namespace libyuv {
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
 
 // Supported filtering
 typedef enum {
@@ -42,16 +45,8 @@
               int dst_width, int dst_height,
               FilterMode filtering);
 
-// Legacy API
-// If dst_height_offset is non-zero, the image is offset by that many pixels
-// and stretched to (dst_height - dst_height_offset * 2) pixels high,
-// instead of dst_height.
-int Scale_1(const uint8* src, int src_width, int src_height,
-          uint8* dst, int dst_width, int dst_height, int dst_height_offset,
-          int interpolate);
-
-// Same, but specified src terms of each plane location and stride.
-int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
+// Legacy API.  Deprecated
+int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
           int src_stride_y, int src_stride_u, int src_stride_v,
           int src_width, int src_height,
           uint8* dst_y, uint8* dst_u, uint8* dst_v,
@@ -59,9 +54,17 @@
           int dst_width, int dst_height,
           int interpolate);
 
+// Legacy API.  Deprecated
+int ScaleOffset(const uint8* src, int src_width, int src_height,
+                uint8* dst, int dst_width, int dst_height, int dst_yoffset,
+                int interpolate);
+
 // For testing, allow disabling of optimizations.
 void SetUseReferenceImpl(int use);
 
-//} // namespace libyuv
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
 
 #endif // INCLUDE_LIBYUV_SCALE_H_

diff --git a/third_party/libyuv/source/cpu_id.c b/third_party/libyuv/source/cpu_id.c
index e3b66f2..fccf3dd 100644
--- a/third_party/libyuv/source/cpu_id.c
+++ b/third_party/libyuv/source/cpu_id.c

@@ -9,66 +9,73 @@
  */
 
 #include "third_party/libyuv/include/libyuv/cpu_id.h"
-#include "third_party/libyuv/include/libyuv/basic_types.h"  // for CPU_X86
 
 #ifdef _MSC_VER
 #include <intrin.h>
 #endif
+#ifdef __ANDROID__
+#include <cpu-features.h>
+#endif
+
+#include "third_party/libyuv/include/libyuv/basic_types.h"  // for CPU_X86
 
 // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
 #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
 static inline void __cpuid(int cpu_info[4], int info_type) {
-  __asm__ volatile (
-    "mov %%ebx, %%edi\n"
-    "cpuid\n"
-    "xchg %%edi, %%ebx\n"
+  asm volatile (
+    "mov %%ebx, %%edi                          \n"
+    "cpuid                                     \n"
+    "xchg %%edi, %%ebx                         \n"
     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
     : "a"(info_type)
   );
 }
 #elif defined(__i386__) || defined(__x86_64__)
 static inline void __cpuid(int cpu_info[4], int info_type) {
-  __asm__ volatile (
-    "cpuid\n"
+  asm volatile (
+    "cpuid                                     \n"
     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
     : "a"(info_type)
   );
 }
 #endif
 
-//namespace libyuv {
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
 
 // CPU detect function for SIMD instruction sets.
-static int cpu_info_initialized_ = 0;
-static int cpu_info_ = 0;
+int cpu_info_ = 0;
 
-// Global lock for cpu initialization.
-static void InitCpuFlags() {
+int InitCpuFlags() {
 #ifdef CPU_X86
   int cpu_info[4];
   __cpuid(cpu_info, 1);
-  cpu_info_ = (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
-              (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0);
+  cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) |
+              (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
+              kCpuInitialized;
+#elif defined(__ANDROID__) && defined(__ARM_NEON__)
+  uint64_t features = android_getCpuFeatures();
+  cpu_info_ = ((features & ANDROID_CPU_ARM_FEATURE_NEON) ? kCpuHasNEON : 0) |
+              kCpuInitialized;
 #elif defined(__ARM_NEON__)
   // gcc -mfpu=neon defines __ARM_NEON__
-  // if code is specifically built for Neon-only, enable the flag.
-  cpu_info_ |= kCpuHasNEON;
+  // Enable Neon if you want support for Neon and Arm, and use MaskCpuFlags
+  // to disable Neon on devices that do not have it.
+  cpu_info_ = kCpuHasNEON | kCpuInitialized;
 #else
-  cpu_info_ = 0;
+  cpu_info_ = kCpuInitialized;
 #endif
-  cpu_info_initialized_ = 1;
+  return cpu_info_;
 }
 
-void MaskCpuFlagsForTest(int enable_flags) {
+void MaskCpuFlags(int enable_flags) {
   InitCpuFlags();
-  cpu_info_ &= enable_flags;
+  cpu_info_ = (cpu_info_ & enable_flags) | kCpuInitialized;
 }
 
-int TestCpuFlag(int flag) {
-  if (!cpu_info_initialized_) {
-    InitCpuFlags();
-  }
-  return cpu_info_ & flag ? 1 : 0;
-}
-
-//}  // namespace libyuv
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif

diff --git a/third_party/libyuv/source/row.h b/third_party/libyuv/source/row.h
index 0486fe2..eabe180 100644
--- a/third_party/libyuv/source/row.h
+++ b/third_party/libyuv/source/row.h

@@ -14,7 +14,7 @@
 #include "third_party/libyuv/include/libyuv/basic_types.h"
 
 #define kMaxStride (2048 * 4)
-//#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
+#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
 
 #if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR)
 #define YUV_DISABLE_ASM
@@ -72,7 +72,10 @@
 #define HAS_REVERSE_ROW_NEON
 #endif
 
-//extern "C" {
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
 
 #ifdef HAS_ARGBTOYROW_SSSE3
 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
@@ -253,6 +256,9 @@
 
 #endif
 
-//}  // extern "C"
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
 
 #endif  // LIBYUV_SOURCE_ROW_H_

diff --git a/third_party/libyuv/source/scale.c b/third_party/libyuv/source/scale.c
index 02ffdac..930a7ae 100644
--- a/third_party/libyuv/source/scale.c
+++ b/third_party/libyuv/source/scale.c

@@ -15,6 +15,17 @@
 
 #include "third_party/libyuv/include/libyuv/cpu_id.h"
 #include "third_party/libyuv/source/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+/*
+ * Note: Defining YUV_DISABLE_ASM allows to use c version.
+ */
+//#define YUV_DISABLE_ASM
+
 #if defined(_MSC_VER)
 #define ALIGN16(var) __declspec(align(16)) var
 #else
@@ -26,8 +37,6 @@
 // Note: Some SSE2 reference manuals
 // cpuvol1.pdf agner_instruction_tables.pdf 253666.pdf 253667.pdf
 
-//namespace libyuv {
-
 // Set the following flag to true to revert to only
 // using the reference implementation ScalePlaneBox(), and
 // NOT the optimized versions. Useful for debugging and
@@ -40,9 +49,7 @@
   use_reference_impl_ = use;
 }
 
-// TODO: The preprocessor definitions for Win64 are not right in build system.
-// Disable optimized code for now.
-#define YUV_DISABLE_ASM
+// ScaleRowDown2Int also used by planar functions
 
 /**
  * NEON downscalers with interpolation.
@@ -511,83 +518,116 @@
     !defined(YUV_DISABLE_ASM)
 #if defined(_MSC_VER)
 #define TALIGN16(t, var) __declspec(align(16)) t _ ## var
-#elif defined(OSX) && defined(__i386__)
+#elif (defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__)) && defined(__i386__)
 #define TALIGN16(t, var) t var __attribute__((aligned(16)))
 #else
 #define TALIGN16(t, var) t _ ## var __attribute__((aligned(16)))
 #endif
 
+#if (defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__)) && \
+    defined(__i386__)
+#define DECLARE_FUNCTION(name)                                                 \
+    ".text                                     \n"                             \
+    ".globl _" #name "                         \n"                             \
+"_" #name ":                                   \n"
+#else
+#define DECLARE_FUNCTION(name)                                                 \
+    ".text                                     \n"                             \
+    ".global " #name "                         \n"                             \
+#name ":                                       \n"
+#endif
+
+
 // Offsets for source bytes 0 to 9
+//extern "C"
 TALIGN16(const uint8, shuf0[16]) =
   { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
 
 // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
+//extern "C"
 TALIGN16(const uint8, shuf1[16]) =
   { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
 
 // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+//extern "C"
 TALIGN16(const uint8, shuf2[16]) =
   { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
 
 // Offsets for source bytes 0 to 10
+//extern "C"
 TALIGN16(const uint8, shuf01[16]) =
   { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
 
 // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
+//extern "C"
 TALIGN16(const uint8, shuf11[16]) =
   { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
 
 // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+//extern "C"
 TALIGN16(const uint8, shuf21[16]) =
   { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
 
 // Coefficients for source bytes 0 to 10
+//extern "C"
 TALIGN16(const uint8, madd01[16]) =
   { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
 
 // Coefficients for source bytes 10 to 21
+//extern "C"
 TALIGN16(const uint8, madd11[16]) =
   { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
 
 // Coefficients for source bytes 21 to 31
+//extern "C"
 TALIGN16(const uint8, madd21[16]) =
   { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
 
 // Coefficients for source bytes 21 to 31
+//extern "C"
 TALIGN16(const int16, round34[8]) =
   { 2, 2, 2, 2, 2, 2, 2, 2 };
 
+//extern "C"
 TALIGN16(const uint8, shuf38a[16]) =
   { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
 
+//extern "C"
 TALIGN16(const uint8, shuf38b[16]) =
   { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
 
 // Arrange words 0,3,6 into 0,1,2
+//extern "C"
 TALIGN16(const uint8, shufac0[16]) =
   { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
 
 // Arrange words 0,3,6 into 3,4,5
+//extern "C"
 TALIGN16(const uint8, shufac3[16]) =
   { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
 
 // Scaling values for boxes of 3x3 and 2x3
+//extern "C"
 TALIGN16(const uint16, scaleac3[8]) =
   { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
 
 // Arrange first value for pixels 0,1,2,3,4,5
+//extern "C"
 TALIGN16(const uint8, shufab0[16]) =
   { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
 
 // Arrange second value for pixels 0,1,2,3,4,5
+//extern "C"
 TALIGN16(const uint8, shufab1[16]) =
   { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
 
 // Arrange third value for pixels 0,1,2,3,4,5
+//extern "C"
 TALIGN16(const uint8, shufab2[16]) =
   { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
 
 // Scaling values for boxes of 3x2 and 2x2
+//extern "C"
 TALIGN16(const uint16, scaleab2[8]) =
   { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
 #endif
@@ -1620,14 +1660,7 @@
 void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleRowDown8Int_SSE2             \n"
-"_ScaleRowDown8Int_SSE2:                       \n"
-#else
-    ".global ScaleRowDown8Int_SSE2             \n"
-"ScaleRowDown8Int_SSE2:                        \n"
-#endif
+    DECLARE_FUNCTION(ScaleRowDown8Int_SSE2)
     "pusha                                     \n"
     "mov    0x24(%esp),%esi                    \n"
     "mov    0x28(%esp),%ebx                    \n"
@@ -1691,14 +1724,7 @@
 void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
                                      uint8* dst_ptr, int dst_width);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleRowDown34_SSSE3              \n"
-"_ScaleRowDown34_SSSE3:                        \n"
-#else
-    ".global ScaleRowDown34_SSSE3              \n"
-"ScaleRowDown34_SSSE3:                         \n"
-#endif
+    DECLARE_FUNCTION(ScaleRowDown34_SSSE3)
     "pusha                                     \n"
     "mov    0x24(%esp),%esi                    \n"
     "mov    0x2c(%esp),%edi                    \n"
@@ -1729,14 +1755,7 @@
 void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                            uint8* dst_ptr, int dst_width);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleRowDown34_1_Int_SSSE3        \n"
-"_ScaleRowDown34_1_Int_SSSE3:                  \n"
-#else
-    ".global ScaleRowDown34_1_Int_SSSE3        \n"
-"ScaleRowDown34_1_Int_SSSE3:                   \n"
-#endif
+    DECLARE_FUNCTION(ScaleRowDown34_1_Int_SSSE3)
     "pusha                                     \n"
     "mov    0x24(%esp),%esi                    \n"
     "mov    0x28(%esp),%ebp                    \n"
@@ -1790,14 +1809,7 @@
 void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                            uint8* dst_ptr, int dst_width);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleRowDown34_0_Int_SSSE3        \n"
-"_ScaleRowDown34_0_Int_SSSE3:                  \n"
-#else
-    ".global ScaleRowDown34_0_Int_SSSE3        \n"
-"ScaleRowDown34_0_Int_SSSE3:                   \n"
-#endif
+    DECLARE_FUNCTION(ScaleRowDown34_0_Int_SSSE3)
     "pusha                                     \n"
     "mov    0x24(%esp),%esi                    \n"
     "mov    0x28(%esp),%ebp                    \n"
@@ -1854,14 +1866,7 @@
 void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
                                      uint8* dst_ptr, int dst_width);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleRowDown38_SSSE3              \n"
-"_ScaleRowDown38_SSSE3:                        \n"
-#else
-    ".global ScaleRowDown38_SSSE3              \n"
-"ScaleRowDown38_SSSE3:                         \n"
-#endif
+    DECLARE_FUNCTION(ScaleRowDown38_SSSE3)
     "pusha                                     \n"
     "mov    0x24(%esp),%esi                    \n"
     "mov    0x28(%esp),%edx                    \n"
@@ -1890,14 +1895,7 @@
 void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                            uint8* dst_ptr, int dst_width);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleRowDown38_3_Int_SSSE3        \n"
-"_ScaleRowDown38_3_Int_SSSE3:                  \n"
-#else
-    ".global ScaleRowDown38_3_Int_SSSE3        \n"
-"ScaleRowDown38_3_Int_SSSE3:                   \n"
-#endif
+    DECLARE_FUNCTION(ScaleRowDown38_3_Int_SSSE3)
     "pusha                                     \n"
     "mov    0x24(%esp),%esi                    \n"
     "mov    0x28(%esp),%edx                    \n"
@@ -1954,14 +1952,7 @@
 void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                            uint8* dst_ptr, int dst_width);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleRowDown38_2_Int_SSSE3        \n"
-"_ScaleRowDown38_2_Int_SSSE3:                  \n"
-#else
-    ".global ScaleRowDown38_2_Int_SSSE3        \n"
-"ScaleRowDown38_2_Int_SSSE3:                   \n"
-#endif
+    DECLARE_FUNCTION(ScaleRowDown38_2_Int_SSSE3)
     "pusha                                     \n"
     "mov    0x24(%esp),%esi                    \n"
     "mov    0x28(%esp),%edx                    \n"
@@ -2001,14 +1992,7 @@
                                   uint16* dst_ptr, int src_width,
                                   int src_height);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleAddRows_SSE2                 \n"
-"_ScaleAddRows_SSE2:                           \n"
-#else
-    ".global ScaleAddRows_SSE2                 \n"
-"ScaleAddRows_SSE2:                            \n"
-#endif
+    DECLARE_FUNCTION(ScaleAddRows_SSE2)
     "pusha                                     \n"
     "mov    0x24(%esp),%esi                    \n"
     "mov    0x28(%esp),%edx                    \n"
@@ -2052,14 +2036,7 @@
                                      const uint8* src_ptr, int src_stride,
                                      int dst_width, int source_y_fraction);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleFilterRows_SSE2              \n"
-"_ScaleFilterRows_SSE2:                        \n"
-#else
-    ".global ScaleFilterRows_SSE2              \n"
-"ScaleFilterRows_SSE2:                         \n"
-#endif
+    DECLARE_FUNCTION(ScaleFilterRows_SSE2)
     "push   %esi                               \n"
     "push   %edi                               \n"
     "mov    0xc(%esp),%edi                     \n"
@@ -2147,14 +2124,7 @@
                                       const uint8* src_ptr, int src_stride,
                                       int dst_width, int source_y_fraction);
   asm(
-    ".text                                     \n"
-#if defined(OSX)
-    ".globl _ScaleFilterRows_SSSE3             \n"
-"_ScaleFilterRows_SSSE3:                       \n"
-#else
-    ".global ScaleFilterRows_SSSE3             \n"
-"ScaleFilterRows_SSSE3:                        \n"
-#endif
+    DECLARE_FUNCTION(ScaleFilterRows_SSSE3)
     "push   %esi                               \n"
     "push   %edi                               \n"
     "mov    0xc(%esp),%edi                     \n"
@@ -2318,7 +2288,7 @@
 
 static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "movdqa     (%4),%%xmm2                      \n"  // _shuf01
   "movdqa     (%5),%%xmm3                      \n"  // _shuf11
   "movdqa     (%6),%%xmm4                      \n"  // _shuf21
@@ -2436,7 +2406,7 @@
 #define HAS_SCALEROWDOWN38_SSSE3
 static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
                                  uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "movdqa     (%3),%%xmm4                      \n"
   "movdqa     (%4),%%xmm5                      \n"
 "1:"
@@ -2560,7 +2530,7 @@
 static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
                               uint16* dst_ptr, int src_width,
                               int src_height) {
-  asm volatile(
+  asm volatile (
   "pxor       %%xmm5,%%xmm5                    \n"
 "1:"
   "movdqa     (%0),%%xmm2                      \n"
@@ -2602,7 +2572,7 @@
                                  const uint8* src_ptr, int src_stride,
                                  int dst_width, int source_y_fraction) {
   if (source_y_fraction == 0) {
-    asm volatile(
+    asm volatile (
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "lea        0x10(%1),%1                  \n"
@@ -2620,7 +2590,7 @@
     );
     return;
   } else if (source_y_fraction == 128) {
-    asm volatile(
+    asm volatile (
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "movdqa     (%1,%3,1),%%xmm2             \n"
@@ -2640,7 +2610,7 @@
     );
     return;
   } else {
-    asm volatile(
+    asm volatile (
       "mov        %3,%%eax                     \n"
       "movd       %%eax,%%xmm6                 \n"
       "punpcklwd  %%xmm6,%%xmm6                \n"
@@ -2693,7 +2663,7 @@
                                   const uint8* src_ptr, int src_stride,
                                   int dst_width, int source_y_fraction) {
   if (source_y_fraction == 0) {
-    asm volatile(
+    asm volatile (
    "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "lea        0x10(%1),%1                  \n"
@@ -2711,7 +2681,7 @@
     );
     return;
   } else if (source_y_fraction == 128) {
-    asm volatile(
+    asm volatile (
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "movdqa     (%1,%3,1),%%xmm2             \n"
@@ -2731,7 +2701,7 @@
     );
     return;
   } else {
-    asm volatile(
+    asm volatile (
       "mov        %3,%%eax                     \n"
       "shr        %%eax                        \n"
       "mov        %%al,%%ah                    \n"
@@ -3095,10 +3065,7 @@
     ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON;
   } else
 #endif
-/* TODO: Force to call C version all the time in ordert to get matching results
- * in multi-resolution encoder example.
- */
-#if 0 //defined(HAS_SCALEROWDOWN2_SSE2)
+#if defined(HAS_SCALEROWDOWN2_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) &&
       IS_ALIGNED(dst_width, 16) &&
       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
@@ -3267,33 +3234,33 @@
     }
   }
   {
-    int src_row = 0;
+  int src_row = 0;
     int y;
     for (y = 0; y < dst_height; ++y) {
-      switch (src_row) {
-        case 0:
-          ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
-          break;
+    switch (src_row) {
+      case 0:
+        ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
+        break;
 
-        case 1:
-          ScaleRowDown34_1(src_ptr, src_stride, dst_ptr, dst_width);
-          break;
+      case 1:
+        ScaleRowDown34_1(src_ptr, src_stride, dst_ptr, dst_width);
+        break;
 
-        case 2:
-          ScaleRowDown34_0(src_ptr + src_stride, -src_stride,
-                           dst_ptr, dst_width);
-          break;
-      }
-      ++src_row;
+      case 2:
+        ScaleRowDown34_0(src_ptr + src_stride, -src_stride,
+                         dst_ptr, dst_width);
+        break;
+    }
+    ++src_row;
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+    if (src_row >= 3) {
       src_ptr += src_stride;
-      dst_ptr += dst_stride;
-      if (src_row >= 3) {
-        src_ptr += src_stride;
-        src_row = 0;
-      }
+      src_row = 0;
     }
   }
 }
+}
 
 /**
  * Scale plane, 3/8
@@ -3350,27 +3317,27 @@
     }
   }
   {
-    int src_row = 0;
+  int src_row = 0;
     int y;
     for (y = 0; y < dst_height; ++y) {
-      switch (src_row) {
-        case 0:
-        case 1:
-          ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
-          src_ptr += src_stride * 3;
-          ++src_row;
-          break;
+    switch (src_row) {
+      case 0:
+      case 1:
+        ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
+        src_ptr += src_stride * 3;
+        ++src_row;
+        break;
 
-        case 2:
-          ScaleRowDown38_2(src_ptr, src_stride, dst_ptr, dst_width);
-          src_ptr += src_stride * 2;
-          src_row = 0;
-          break;
-      }
-      dst_ptr += dst_stride;
+      case 2:
+        ScaleRowDown38_2(src_ptr, src_stride, dst_ptr, dst_width);
+        src_ptr += src_stride * 2;
+        src_row = 0;
+        break;
     }
+    dst_ptr += dst_stride;
   }
 }
+}
 
 __inline static uint32 SumBox(int iboxwidth, int iboxheight,
                             int src_stride, const uint8* src_ptr) {
@@ -3421,15 +3388,15 @@
   scaletbl[0] = 65536 / (minboxwidth * boxheight);
   scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
   {
-    int *scaleptr = scaletbl - minboxwidth;
-    int x = 0;
+  int *scaleptr = scaletbl - minboxwidth;
+  int x = 0;
     int i;
     for (i = 0; i < dst_width; ++i) {
-      int ix = x >> 16;
+    int ix = x >> 16;
       int boxwidth;
-      x += dx;
+    x += dx;
       boxwidth = (x >> 16) - ix;
-      *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
+    *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
     }
   }
 }
@@ -3509,20 +3476,20 @@
     }
 
     {
-      int y = 0;
+    int y = 0;
       int j;
       for (j = 0; j < dst_height; ++j) {
-        int iy = y >> 16;
-        const uint8* const src = src_ptr + iy * src_stride;
+      int iy = y >> 16;
+      const uint8* const src = src_ptr + iy * src_stride;
         int boxheight;
-        y += dy;
-        if (y > (src_height << 16)) {
-          y = (src_height << 16);
-        }
+      y += dy;
+      if (y > (src_height << 16)) {
+        y = (src_height << 16);
+      }
         boxheight = (y >> 16) - iy;
-        ScaleAddRows(src, src_stride, row, src_width, boxheight);
-        ScaleAddCols(dst_width, boxheight, dx, row, dst_ptr);
-        dst_ptr += dst_stride;
+      ScaleAddRows(src, src_stride, row, src_width, boxheight);
+      ScaleAddCols(dst_width, boxheight, dx, row, dst_ptr);
+      dst_ptr += dst_stride;
       }
     }
   }
@@ -3614,24 +3581,24 @@
     ScaleFilterCols = ScaleFilterCols_C;
 
     {
-      int y = 0;
-      int maxy = ((src_height - 1) << 16) - 1; // max is filter of last 2 rows.
+    int y = 0;
+    int maxy = ((src_height - 1) << 16) - 1; // max is filter of last 2 rows.
       int j;
       for (j = 0; j < dst_height; ++j) {
-        int iy = y >> 16;
-        int fy = (y >> 8) & 255;
-        const uint8* const src = src_ptr + iy * src_stride;
-        ScaleFilterRows(row, src, src_stride, src_width, fy);
-        ScaleFilterCols(dst_ptr, row, dst_width, dx);
-        dst_ptr += dst_stride;
-        y += dy;
-        if (y > maxy) {
-          y = maxy;
-        }
+      int iy = y >> 16;
+      int fy = (y >> 8) & 255;
+      const uint8* const src = src_ptr + iy * src_stride;
+      ScaleFilterRows(row, src, src_stride, src_width, fy);
+      ScaleFilterCols(dst_ptr, row, dst_width, dx);
+      dst_ptr += dst_stride;
+      y += dy;
+      if (y > maxy) {
+        y = maxy;
       }
     }
   }
 }
+}
 
 /**
  * Scale plane to/from any dimensions, without interpolation.
@@ -3818,36 +3785,32 @@
     src_stride_v = -src_stride_v;
   }
   {
-    int halfsrc_width = (src_width + 1) >> 1;
-    int halfsrc_height = (src_height + 1) >> 1;
-    int halfdst_width = (dst_width + 1) >> 1;
-    int halfoheight = (dst_height + 1) >> 1;
+  int src_halfwidth = (src_width + 1) >> 1;
+  int src_halfheight = (src_height + 1) >> 1;
+  int dst_halfwidth = (dst_width + 1) >> 1;
+  int dst_halfheight = (dst_height + 1) >> 1;
 
     ScalePlane(src_y, src_stride_y, src_width, src_height,
                dst_y, dst_stride_y, dst_width, dst_height,
                filtering, use_reference_impl_);
-    ScalePlane(src_u, src_stride_u, halfsrc_width, halfsrc_height,
-               dst_u, dst_stride_u, halfdst_width, halfoheight,
-               filtering, use_reference_impl_);
-    ScalePlane(src_v, src_stride_v, halfsrc_width, halfsrc_height,
-               dst_v, dst_stride_v, halfdst_width, halfoheight,
-               filtering, use_reference_impl_);
+  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
+             dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
+             filtering, use_reference_impl_);
+  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
+             dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
+             filtering, use_reference_impl_);
   }
   return 0;
 }
 
-int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
+// Deprecated api
+int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
           int src_stride_y, int src_stride_u, int src_stride_v,
           int src_width, int src_height,
           uint8* dst_y, uint8* dst_u, uint8* dst_v,
           int dst_stride_y, int dst_stride_u, int dst_stride_v,
           int dst_width, int dst_height,
           int interpolate) {
-  int halfsrc_width;
-  int halfsrc_height;
-  int halfdst_width;
-  int halfoheight;
-  FilterMode filtering;
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
     return -1;
@@ -3864,51 +3827,58 @@
     src_stride_u = -src_stride_u;
     src_stride_v = -src_stride_v;
   }
-  halfsrc_width = (src_width + 1) >> 1;
-  halfsrc_height = (src_height + 1) >> 1;
-  halfdst_width = (dst_width + 1) >> 1;
-  halfoheight = (dst_height + 1) >> 1;
-  filtering = interpolate ? kFilterBox : kFilterNone;
+  {
+  int src_halfwidth = (src_width + 1) >> 1;
+  int src_halfheight = (src_height + 1) >> 1;
+  int dst_halfwidth = (dst_width + 1) >> 1;
+  int dst_halfheight = (dst_height + 1) >> 1;
+  FilterMode filtering = interpolate ? kFilterBox : kFilterNone;
 
   ScalePlane(src_y, src_stride_y, src_width, src_height,
              dst_y, dst_stride_y, dst_width, dst_height,
              filtering, use_reference_impl_);
-  ScalePlane(src_u, src_stride_u, halfsrc_width, halfsrc_height,
-             dst_u, dst_stride_u, halfdst_width, halfoheight,
+  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
+             dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
              filtering, use_reference_impl_);
-  ScalePlane(src_v, src_stride_v, halfsrc_width, halfsrc_height,
-             dst_v, dst_stride_v, halfdst_width, halfoheight,
+  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
+             dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
              filtering, use_reference_impl_);
+  }
   return 0;
 }
 
-int Scale_1(const uint8* src, int src_width, int src_height,
-          uint8* dst, int dst_width, int dst_height, int ooffset,
+// Deprecated api
+int ScaleOffset(const uint8* src, int src_width, int src_height,
+                uint8* dst, int dst_width, int dst_height, int dst_yoffset,
           int interpolate) {
   if (!src || src_width <= 0 || src_height <= 0 ||
-      !dst || dst_width <= 0 || dst_height <= 0 || ooffset < 0 ||
-      ooffset >= dst_height) {
+      !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset < 0 ||
+      dst_yoffset >= dst_height) {
     return -1;
   }
-  ooffset = ooffset & ~1;  // chroma requires offset to multiple of 2.
+  dst_yoffset = dst_yoffset & ~1;  // chroma requires offset to multiple of 2.
   {
-    int halfsrc_width = (src_width + 1) >> 1;
-    int halfsrc_height = (src_height + 1) >> 1;
-    int halfdst_width = (dst_width + 1) >> 1;
-    int halfoheight = (dst_height + 1) >> 1;
-    int aheight = dst_height - ooffset * 2;  // actual output height
-    const uint8* const iyptr = src;
-    uint8* oyptr = dst + ooffset * dst_width;
-    const uint8* const iuptr = src + src_width * src_height;
-    uint8* ouptr = dst + dst_width * dst_height + (ooffset >> 1) * halfdst_width;
-    const uint8* const ivptr = src + src_width * src_height +
-                               halfsrc_width * halfsrc_height;
-    uint8* ovptr = dst + dst_width * dst_height + halfdst_width * halfoheight +
-                   (ooffset >> 1) * halfdst_width;
-    return Scale_2(iyptr, iuptr, ivptr, src_width, halfsrc_width, halfsrc_width,
-                 src_width, src_height, oyptr, ouptr, ovptr, dst_width,
-                 halfdst_width, halfdst_width, dst_width, aheight, interpolate);
+  int src_halfwidth = (src_width + 1) >> 1;
+  int src_halfheight = (src_height + 1) >> 1;
+  int dst_halfwidth = (dst_width + 1) >> 1;
+  int dst_halfheight = (dst_height + 1) >> 1;
+  int aheight = dst_height - dst_yoffset * 2;  // actual output height
+  const uint8* const src_y = src;
+  const uint8* const src_u = src + src_width * src_height;
+  const uint8* const src_v = src + src_width * src_height +
+                             src_halfwidth * src_halfheight;
+  uint8* dst_y = dst + dst_yoffset * dst_width;
+  uint8* dst_u = dst + dst_width * dst_height +
+                 (dst_yoffset >> 1) * dst_halfwidth;
+  uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
+                 (dst_yoffset >> 1) * dst_halfwidth;
+  return Scale(src_y, src_u, src_v, src_width, src_halfwidth, src_halfwidth,
+               src_width, src_height, dst_y, dst_u, dst_v, dst_width,
+               dst_halfwidth, dst_halfwidth, dst_width, aheight, interpolate);
   }
 }
 
-//}  // namespace libyuv
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif

diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
index cd55a63..89a2be8 100644
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c

@@ -11,7 +11,6 @@
 
 #include "vpx_config.h"
 #include "vpx_ports/arm.h"
-#include "vp8/common/g_common.h"
 #include "vp8/common/pragmas.h"
 #include "vp8/common/subpixel.h"
 #include "vp8/common/loopfilter.h"
@@ -63,6 +62,12 @@
         rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
         rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
         rtcd->recon.intra4x4_predict = vp8_intra4x4_predict_armv6;
+
+        rtcd->dequant.block               = vp8_dequantize_b_v6;
+        rtcd->dequant.idct_add            = vp8_dequant_idct_add_v6;
+        rtcd->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
+        rtcd->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
+
     }
 #endif
 
@@ -97,6 +102,12 @@
             vp8_build_intra_predictors_mby_neon;
         rtcd->recon.build_intra_predictors_mby_s =
             vp8_build_intra_predictors_mby_s_neon;
+
+        rtcd->dequant.block               = vp8_dequantize_b_neon;
+        rtcd->dequant.idct_add            = vp8_dequant_idct_add_neon;
+        rtcd->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
+        rtcd->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
+
     }
 #endif
 

diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/common/arm/armv6/dequant_idct_v6.asm
similarity index 100%
rename from vp8/decoder/arm/armv6/dequant_idct_v6.asm
rename to vp8/common/arm/armv6/dequant_idct_v6.asm


diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/common/arm/armv6/dequantize_v6.asm
similarity index 100%
rename from vp8/decoder/arm/armv6/dequantize_v6.asm
rename to vp8/common/arm/armv6/dequantize_v6.asm


diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/common/arm/armv6/idct_blk_v6.c
similarity index 98%
rename from vp8/decoder/arm/armv6/idct_blk_v6.c
rename to vp8/common/arm/armv6/idct_blk_v6.c
index c1ef285..9108929 100644
--- a/vp8/decoder/arm/armv6/idct_blk_v6.c
+++ b/vp8/common/arm/armv6/idct_blk_v6.c

@@ -10,7 +10,7 @@
 
 #include "vpx_config.h"
 #include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
+#include "vp8/common/dequantize.h"
 
 
 void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,

diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/common/arm/dequantize_arm.c
similarity index 93%
rename from vp8/decoder/arm/dequantize_arm.c
rename to vp8/common/arm/dequantize_arm.c
index 2918e05..20a8ac4 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/common/arm/dequantize_arm.c

@@ -10,9 +10,8 @@
 
 
 #include "vpx_config.h"
-#include "vp8/decoder/dequantize.h"
+#include "vp8/common/dequantize.h"
 #include "vp8/common/idct.h"
-#include "vpx_mem/vpx_mem.h"
 
 #if HAVE_ARMV7
 extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);

diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/common/arm/dequantize_arm.h
similarity index 88%
rename from vp8/decoder/arm/dequantize_arm.h
rename to vp8/common/arm/dequantize_arm.h
index 1123e84..0b4d8fe 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/common/arm/dequantize_arm.h

@@ -22,13 +22,13 @@
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_v6
 
-#undef vp8_dequant_idct_add
+#undef  vp8_dequant_idct_add
 #define vp8_dequant_idct_add vp8_dequant_idct_add_v6
 
-#undef vp8_dequant_idct_add_y_block
+#undef  vp8_dequant_idct_add_y_block
 #define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
 
-#undef vp8_dequant_idct_add_uv_block
+#undef  vp8_dequant_idct_add_uv_block
 #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
 #endif
 #endif
@@ -44,13 +44,13 @@
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_neon
 
-#undef vp8_dequant_idct_add
+#undef  vp8_dequant_idct_add
 #define vp8_dequant_idct_add vp8_dequant_idct_add_neon
 
-#undef vp8_dequant_idct_add_y_block
+#undef  vp8_dequant_idct_add_y_block
 #define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon
 
-#undef vp8_dequant_idct_add_uv_block
+#undef  vp8_dequant_idct_add_uv_block
 #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
 #endif
 

diff --git a/vp8/decoder/arm/neon/dequant_idct_neon.asm b/vp8/common/arm/neon/dequant_idct_neon.asm
similarity index 100%
rename from vp8/decoder/arm/neon/dequant_idct_neon.asm
rename to vp8/common/arm/neon/dequant_idct_neon.asm


diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/common/arm/neon/dequantizeb_neon.asm
similarity index 100%
rename from vp8/decoder/arm/neon/dequantizeb_neon.asm
rename to vp8/common/arm/neon/dequantizeb_neon.asm


diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/common/arm/neon/idct_blk_neon.c
similarity index 98%
rename from vp8/decoder/arm/neon/idct_blk_neon.c
rename to vp8/common/arm/neon/idct_blk_neon.c
index 185895f..cc55843 100644
--- a/vp8/decoder/arm/neon/idct_blk_neon.c
+++ b/vp8/common/arm/neon/idct_blk_neon.c

@@ -10,7 +10,7 @@
 
 #include "vpx_config.h"
 #include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
+#include "vp8/common/dequantize.h"
 
 /* place these declarations here because we don't want to maintain them
  * outside of this scope

diff --git a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
similarity index 100%
rename from vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
rename to vp8/common/arm/neon/idct_dequant_0_2x_neon.asm


diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
similarity index 100%
rename from vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
rename to vp8/common/arm/neon/idct_dequant_full_2x_neon.asm


diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h
deleted file mode 100644
index 6ac3f8b..0000000
--- a/vp8/common/bigend.h
+++ /dev/null

@@ -1,32 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _bigend_h
-#define _bigend_h
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#define invert2(x) ( (((x)>>8)&0x00ff) | (((x)<<8)&0xff00) )
-#define invert4(x) ( ((invert2(x)&0x0000ffff)<<16) | (invert2((x>>16))&0x0000ffff) )
-
-#define high_byte(x) (unsigned char)x
-#define mid2Byte(x) (unsigned char)(x >> 8)
-#define mid1Byte(x) (unsigned char)(x >> 16)
-#define low_byte(x) (unsigned char)(x >> 24)
-
-#define SWAPENDS 1
-
-#if defined(__cplusplus)
-}
-#endif
-#endif

diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 586fa0a..1f3c5d1 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h

@@ -21,9 +21,6 @@
 #include "subpixel.h"
 #include "vpx_ports/mem.h"
 
-#define TRUE    1
-#define FALSE   0
-
 /*#define DCPRED 1*/
 #define DCPREDSIMTHRESH 0
 #define DCPREDCNTTHRESH 3

diff --git a/vp8/common/common.h b/vp8/common/common.h
index 9a93da9..2cc1c54 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h

@@ -18,8 +18,6 @@
 
 #include "vpx_mem/vpx_mem.h"
 
-#include "common_types.h"
-
 /* Only need this for fixed-size arrays, for structs just assign. */
 
 #define vp8_copy( Dest, Src) { \

diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h
deleted file mode 100644
index 4e62486..0000000
--- a/vp8/common/common_types.h
+++ /dev/null

@@ -1,18 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __INC_COMMON_TYPES
-#define __INC_COMMON_TYPES
-
-#define TRUE    1
-#define FALSE   0
-
-#endif

diff --git a/vp8/decoder/dequantize.c b/vp8/common/dequantize.c
similarity index 100%
rename from vp8/decoder/dequantize.c
rename to vp8/common/dequantize.c


diff --git a/vp8/decoder/dequantize.h b/vp8/common/dequantize.h
similarity index 100%
rename from vp8/decoder/dequantize.h
rename to vp8/common/dequantize.h


diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h
deleted file mode 100644
index b923da6..0000000
--- a/vp8/common/dma_desc.h
+++ /dev/null

@@ -1,125 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _dma_desc_h
-#define _dma_desc_h
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-
-#define NDSIZE_LG   0x00000900  // Next Descriptor Size
-#define NDSIZE_SM   0x00000800  // Next Descriptor Size
-#define NDSIZE_7    0x00000700  // Next Descriptor Size
-#define NDSIZE_6    0x00000600  // Next Descriptor Size
-#define NDSIZE_5    0x00000500  // Next Descriptor Size
-#define NDSIZE_4    0x00000400  // Next Descriptor Size
-#define NDSIZE_3    0x00000300  // Next Descriptor Size
-#define NDSIZE_2    0x00000200  // Next Descriptor Size
-#define NDSIZE_1    0x00000100  // Next Descriptor Size
-
-#define FLOW_STOP       0x0000
-#define FLOW_AUTO       0x1000
-#define FLOW_DESC_AR    0x4000
-#define FLOW_DESC_SM    0x6000
-#define FLOW_DESC_LG    0x7000
-
-    typedef struct
-    {
-        unsigned int ndp;
-        //unsigned short ndpl;
-        //unsigned short ndph;
-        unsigned int sa;
-        //unsigned short sal;
-        //unsigned short sah;
-
-        unsigned short dmacfg;
-        unsigned short xcnt;
-        unsigned short xmod;
-        unsigned short ycnt;
-        unsigned short ymod;
-
-    } LARGE_DESC;
-
-    typedef struct
-    {
-        unsigned short ndpl;
-        unsigned short sal;
-        unsigned short sah;
-        unsigned short dmacfg;
-        unsigned short xcnt;
-        unsigned short xmod;
-        unsigned short ycnt;
-        unsigned short ymod;
-    } SMALL_DESC;
-
-    typedef struct
-    {
-        unsigned short sal;
-        unsigned short sah;
-        unsigned short dmacfg;
-        unsigned short xcnt;
-        unsigned short xmod;
-        unsigned short ycnt;
-        unsigned short ymod;
-    } ARRAY_DESC_7;
-
-    typedef struct
-    {
-        unsigned short sal;
-        unsigned short sah;
-        unsigned short dmacfg;
-        unsigned short xcnt;
-        unsigned short xmod;
-        unsigned short ycnt;
-    } ARRAY_DESC_6;
-
-    typedef struct
-    {
-        unsigned short sal;
-        unsigned short sah;
-        unsigned short dmacfg;
-        unsigned short xcnt;
-        unsigned short xmod;
-    } ARRAY_DESC_5;
-
-    typedef struct
-    {
-        unsigned short sal;
-        unsigned short sah;
-        unsigned short dmacfg;
-        unsigned short xcnt;
-    } ARRAY_DESC_4;
-
-    typedef struct
-    {
-        unsigned short sal;
-        unsigned short sah;
-        unsigned short dmacfg;
-    } ARRAY_DESC_3;
-
-    typedef struct
-    {
-        unsigned short sal;
-        unsigned short sah;
-    } ARRAY_DESC_2;
-
-    typedef struct
-    {
-        unsigned short sal;
-    } ARRAY_DESC_1;
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif //_dma_desc_h

diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h
deleted file mode 100644
index 43daa65..0000000
--- a/vp8/common/duck_io.h
+++ /dev/null

@@ -1,116 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _duck_io_h
-#define _duck_io_h
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#if defined (_WIN32)
-    typedef __int64 int64_t;
-#elif defined(__MWERKS__)
-    typedef long long int64_t;
-#elif defined(__APPLE__) || defined(__POWERPC)
-#include <ppc/types.h>
-#else
-    typedef long long int64_t;
-#endif
-
-    typedef struct
-    {
-        int64_t  offset;     // offset to start from
-        int    blocking;    // non-zero for blocking
-    } re_open_t;
-
-
-    typedef enum
-    {
-        SAL_ERR_MAX                 = -10,
-        SAL_ERROR                   = -11, // Default error
-        SAL_ERR_WSASTARTUP          = -12,
-        SAL_ERR_SOCKET_CREATE       = -13,
-        SAL_ERR_RESOLVING_HOSTNAME  = -14,
-        SAL_ERR_SERVER_CONNECTION   = -15,
-        SAL_ERR_SENDING_DATA        = -16,
-        SAL_ERR_RECEIVING_DATA      = -17,
-        SAL_ERR_404_FILE_NOT_FOUND  = -18,
-        SAL_ERR_PARSING_HTTP_HEADER = -19,
-        SAL_ERR_PARSING_CONTENT_LEN = -20,
-        SAL_ERR_CONNECTION_TIMEOUT  = -21,
-        SAL_ERR_FILE_OPEN_FAILED    = -22,
-        SAL_ERR_MIN                 = -23
-    } SAL_ERR; /* EMH 1-15-03 */
-
-
-    typedef struct sal_err_map_temp
-    {
-        SAL_ERR code;
-        const char *decode;
-
-    } sal_err_map_t;
-
-
-    static char *sal_err_text(SAL_ERR e)
-    {
-        int t;
-        const sal_err_map_t g_sal_err_map[] =
-        {
-            {   SAL_ERR_WSASTARTUP,             "Error with WSAStartup"         },
-            {   SAL_ERR_SOCKET_CREATE,          "Error creating socket"         },
-            {   SAL_ERR_RESOLVING_HOSTNAME,     "Error resolving hostname"      },
-            {   SAL_ERR_SERVER_CONNECTION,      "Error connecting to server"    },
-            {   SAL_ERR_SENDING_DATA,           "Error sending data"            },
-            {   SAL_ERR_RECEIVING_DATA,         "Error receiving data"          },
-            {   SAL_ERR_404_FILE_NOT_FOUND,     "Error file not found "         },
-            {   SAL_ERR_PARSING_HTTP_HEADER,    "Error parsing http header"     },
-            {   SAL_ERR_PARSING_CONTENT_LEN,    "Error parsing content length"  },
-            {   SAL_ERR_CONNECTION_TIMEOUT,     "Error Connection timed out"    },
-            {   SAL_ERR_FILE_OPEN_FAILED,       "Error opening file"            }
-        };
-
-        for (t = 0; t < sizeof(g_sal_err_map) / sizeof(sal_err_map_t); t++)
-        {
-            if (e == g_sal_err_map[t].code)
-                return (char *) g_sal_err_map[t].decode;
-        }
-
-        return 0;
-    }
-
-
-
-
-
-
-
-    int duck_open(const char *fname, unsigned long user_data);
-
-    void duck_close(int ghndl);
-
-    int duck_read(int ghndl, unsigned char *buf, int nbytes);
-
-    int64_t duck_seek(int g_hndl, int64_t offs, int origin);
-
-    int duck_read_finished(int han, int flag); /* FWG 7-9-99 */
-
-    int duck_name(int handle, char name[], size_t max_len); /* EMH 9-23-03 */
-
-    int duck_read_blocking(int handle, unsigned char *buffer, int bytes); /* EMH 9-23-03 */
-
-    int64_t duck_available_data(int handle); /* EMH 10-23-03 */
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif

diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 01909b9..a3443d7 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h

@@ -60,10 +60,10 @@
                                 int mb_to_bottom_edge)
 {
     unsigned int need_to_clamp;
-    need_to_clamp = (mv->as_mv.col < mb_to_left_edge) ? 1 : 0;
-    need_to_clamp |= (mv->as_mv.col > mb_to_right_edge) ? 1 : 0;
-    need_to_clamp |= (mv->as_mv.row < mb_to_top_edge) ? 1 : 0;
-    need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge) ? 1 : 0;
+    need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
+    need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
+    need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
+    need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
     return need_to_clamp;
 }
 

diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h
deleted file mode 100644
index 5f52398..0000000
--- a/vp8/common/g_common.h
+++ /dev/null

@@ -1,21 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-extern void (*vp8_clear_system_state)(void);
-extern void (*vp8_plane_add_noise)(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int DPitch, int q);
-extern void (*de_interlace)
-(
-    unsigned char *src_ptr,
-    unsigned char *dst_ptr,
-    int Width,
-    int Height,
-    int Stride
-);

diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 9641d8c..01d7620 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c

@@ -10,7 +10,6 @@
 
 
 #include "vpx_config.h"
-#include "vp8/common/g_common.h"
 #include "vp8/common/subpixel.h"
 #include "vp8/common/loopfilter.h"
 #include "vp8/common/recon.h"
@@ -70,6 +69,14 @@
 #if CONFIG_RUNTIME_CPU_DETECT
     VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
 
+
+    rtcd->dequant.block             = vp8_dequantize_b_c;
+    rtcd->dequant.idct_add          = vp8_dequant_idct_add_c;
+    rtcd->dequant.idct_add_y_block  = vp8_dequant_idct_add_y_block_c;
+    rtcd->dequant.idct_add_uv_block =
+        vp8_dequant_idct_add_uv_block_c;
+
+
     rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
     rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
     rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;

diff --git a/vp8/decoder/idct_blk.c b/vp8/common/idct_blk.c
similarity index 100%
rename from vp8/decoder/idct_blk.c
rename to vp8/common/idct_blk.c


diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
deleted file mode 100644
index 95e6980..0000000
--- a/vp8/common/invtrans.c
+++ /dev/null

@@ -1,56 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "invtrans.h"
-
-
-void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b,
-                             int pitch)
-{
-    if (*b->eob > 1)
-    {
-        IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, pitch,
-              *(b->base_dst) + b->dst, b->dst_stride);
-    }
-    else
-    {
-        IDCT_INVOKE(rtcd, idct1_scalar_add)(b->dqcoeff[0], b->predictor, pitch,
-                         *(b->base_dst) + b->dst, b->dst_stride);
-    }
-
-}
-
-void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    int i;
-
-    if(x->mode_info_context->mbmi.mode != SPLITMV)
-    {
-        /* do 2nd order transform on the dc block */
-        IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->dqcoeff);
-    }
-
-    for (i = 0; i < 16; i++)
-    {
-        vp8_inverse_transform_b(rtcd, &x->block[i], 16);
-    }
-
-}
-void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    int i;
-
-    for (i = 16; i < 24; i++)
-    {
-        vp8_inverse_transform_b(rtcd, &x->block[i], 8);
-    }
-
-}

diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index d14573b..2bcbeec 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h

@@ -15,9 +15,66 @@
 #include "vpx_config.h"
 #include "idct.h"
 #include "blockd.h"
-extern void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
-extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
-extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
-extern void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
+#include "onyxc_int.h"
 
+#if CONFIG_MULTITHREAD
+#include "vpx_mem/vpx_mem.h"
+#endif
+
+static void eob_adjust(char *eobs, short *diff)
+{
+    /* eob adjust.... the idct can only skip if both the dc and eob are zero */
+    int js;
+    for(js = 0; js < 16; js++)
+    {
+        if((eobs[js] == 0) && (diff[0] != 0))
+            eobs[js]++;
+        diff+=16;
+    }
+}
+
+static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
+                                      const VP8_COMMON_RTCD *rtcd)
+{
+    short *DQC = xd->block[0].dequant;
+    /* save the dc dequant constant in case it is overridden */
+    short dc_dequant_temp = DQC[0];
+
+#if CONFIG_MULTITHREAD
+    DECLARE_ALIGNED(16, short, local_dequant[16]);
+#endif
+
+    if (xd->mode_info_context->mbmi.mode != SPLITMV)
+    {
+        /* do 2nd order transform on the dc block */
+        if (xd->eobs[24] > 1)
+        {
+            IDCT_INVOKE(&rtcd->idct, iwalsh16)
+                (&xd->block[24].dqcoeff[0], xd->qcoeff);
+        }
+        else
+        {
+            IDCT_INVOKE(&rtcd->idct, iwalsh1)
+                (&xd->block[24].dqcoeff[0], xd->qcoeff);
+        }
+        eob_adjust(xd->eobs, xd->qcoeff);
+
+#if CONFIG_MULTITHREAD
+        DQC = local_dequant;
+
+        vpx_memcpy(DQC, xd->block[0].dequant,
+                   sizeof(local_dequant));
+#endif
+
+        /* override the dc dequant constant */
+        DQC[0] = 1;
+    }
+    DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block)
+                    (xd->qcoeff, DQC,
+                     xd->dst.y_buffer,
+                     xd->dst.y_stride, xd->eobs);
+
+    /* restore the dc dequant constant */
+    DQC[0] = dc_dequant_temp;
+}
 #endif

diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h
deleted file mode 100644
index 99df116..0000000
--- a/vp8/common/littlend.h
+++ /dev/null

@@ -1,33 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _littlend_h
-#define _littlend_h
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#define invert2(x) (x)
-#define invert4(x) (x)
-
-#define low_byte(x) (unsigned char)x
-#define mid1Byte(x) (unsigned char)(x >> 8)
-#define mid2Byte(x) (unsigned char)(x >> 16)
-#define high_byte(x) (unsigned char)(x >> 24)
-
-#define SWAPENDS 0
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif

diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 37fa5a0..d17a32b 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h

@@ -22,9 +22,9 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 #include "vpx_scale/yv12config.h"
-#include "type_aliases.h"
 #include "ppflags.h"
-    typedef int *VP8_PTR;
+
+    struct VP8_COMP;
 
     /* Create/destroy static data structures. */
 
@@ -226,27 +226,27 @@
 
     void vp8_initialize();
 
-    VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf);
-    void vp8_remove_compressor(VP8_PTR *comp);
+    struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf);
+    void vp8_remove_compressor(struct VP8_COMP* *comp);
 
-    void vp8_init_config(VP8_PTR onyx, VP8_CONFIG *oxcf);
-    void vp8_change_config(VP8_PTR onyx, VP8_CONFIG *oxcf);
+    void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
+    void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
 
 // receive a frames worth of data caller can assume that a copy of this frame is made
 // and not just a copy of the pointer..
-    int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
-    int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
-    int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
+    int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
+    int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
+    int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
 
-    int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
-    int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);
-    int vp8_get_reference(VP8_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
-    int vp8_set_reference(VP8_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
-    int vp8_update_entropy(VP8_PTR comp, int update);
-    int vp8_set_roimap(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
-    int vp8_set_active_map(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols);
-    int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
-    int vp8_get_quantizer(VP8_PTR c);
+    int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags);
+    int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags);
+    int vp8_get_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+    int vp8_set_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+    int vp8_update_entropy(struct VP8_COMP* comp, int update);
+    int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
+    int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols);
+    int vp8_set_internal_size(struct VP8_COMP* comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
+    int vp8_get_quantizer(struct VP8_COMP* c);
 
 #ifdef __cplusplus
 }

diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 936fa9f..f733ff7 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h

@@ -22,6 +22,7 @@
 #if CONFIG_POSTPROC
 #include "postproc.h"
 #endif
+#include "dequantize.h"
 
 /*#ifdef PACKET_TESTING*/
 #include "header.h"
@@ -73,6 +74,7 @@
 typedef struct VP8_COMMON_RTCD
 {
 #if CONFIG_RUNTIME_CPU_DETECT
+    vp8_dequant_rtcd_vtable_t        dequant;
     vp8_idct_rtcd_vtable_t        idct;
     vp8_recon_rtcd_vtable_t       recon;
     vp8_subpix_rtcd_vtable_t      subpix;

diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index 43fa00b..35a8b6e 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h

@@ -18,13 +18,13 @@
 extern "C"
 {
 #endif
-#include "type_aliases.h"
 #include "vpx_scale/yv12config.h"
 #include "ppflags.h"
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_codec.h"
 
-    typedef void   *VP8D_PTR;
+    struct VP8D_COMP;
+
     typedef struct
     {
         int     Width;
@@ -49,19 +49,19 @@
 
     void vp8dx_initialize(void);
 
-    void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x);
+    void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x);
 
-    int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
+    int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
 
-    int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
-    int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
+    int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
+    int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
 
-    vpx_codec_err_t vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
-    vpx_codec_err_t vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+    vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+    vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
 
-    VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf);
+    struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf);
 
-    void vp8dx_remove_decompressor(VP8D_PTR comp);
+    void vp8dx_remove_decompressor(struct VP8D_COMP* comp);
 
 #ifdef __cplusplus
 }

diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 1f5d790..7046a63 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c

@@ -9,7 +9,6 @@
  */
 
 
-#include "g_common.h"
 #include "subpixel.h"
 #include "loopfilter.h"
 #include "recon.h"

diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 24c09a3..6c7af41 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c

@@ -334,11 +334,12 @@
 
 
 /*encoder only*/
-void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
+void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
+                                         unsigned char *dst_y,
+                                         int dst_ystride)
 {
     unsigned char *ptr_base;
     unsigned char *ptr;
-    unsigned char *pred_ptr = x->predictor;
     int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
     int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
     int pre_stride = x->block[0].pre_stride;
@@ -348,11 +349,13 @@
 
     if ((mv_row | mv_col) & 7)
     {
-        x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
+        x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7,
+                                 dst_y, dst_ystride);
     }
     else
     {
-        RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
+        RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y,
+            dst_ystride);
     }
 }
 
@@ -596,69 +599,3 @@
         build_inter4x4_predictors_mb(xd);
     }
 }
-/* encoder only*/
-static void build_inter4x4_predictors_mb_e(MACROBLOCKD *x)
-{
-    int i;
-
-    if (x->mode_info_context->mbmi.partitioning < 3)
-    {
-        x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
-        x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
-        x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
-        x->block[10].bmi = x->mode_info_context->bmi[10];
-
-        build_inter_predictors4b(x, &x->block[ 0], x->block[ 0].predictor, 16);
-        build_inter_predictors4b(x, &x->block[ 2], x->block[ 2].predictor, 16);
-        build_inter_predictors4b(x, &x->block[ 8], x->block[ 8].predictor, 16);
-        build_inter_predictors4b(x, &x->block[10], x->block[10].predictor, 16);
-    }
-    else
-    {
-        for (i = 0; i < 16; i += 2)
-        {
-            BLOCKD *d0 = &x->block[i];
-            BLOCKD *d1 = &x->block[i+1];
-
-            x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
-            x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
-
-            if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
-                build_inter_predictors2b(x, d0, d0->predictor, 16);
-            else
-            {
-                build_inter_predictors_b(d0, d0->predictor, 16, x->subpixel_predict);
-                build_inter_predictors_b(d1, d1->predictor, 16, x->subpixel_predict);
-            }
-
-        }
-
-    }
-
-    for (i = 16; i < 24; i += 2)
-    {
-        BLOCKD *d0 = &x->block[i];
-        BLOCKD *d1 = &x->block[i+1];
-
-        if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
-            build_inter_predictors2b(x, d0, d0->predictor, 8);
-        else
-        {
-            build_inter_predictors_b(d0, d0->predictor, 8, x->subpixel_predict);
-            build_inter_predictors_b(d1, d1->predictor, 8, x->subpixel_predict);
-        }
-    }
-}
-void vp8_build_inter_predictors_mb_e(MACROBLOCKD *xd)
-{
-    if (xd->mode_info_context->mbmi.mode != SPLITMV)
-    {
-        vp8_build_inter16x16_predictors_mb(xd, xd->predictor, &xd->predictor[256],
-                                           &xd->predictor[320], 16, 8);
-    }
-    else
-    {
-        build_4x4uvmvs(xd);
-        build_inter4x4_predictors_mb_e(xd);
-    }
-}

diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index 86f9d5a..f57ff73 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h

@@ -21,11 +21,13 @@
                                                int dst_uvstride);
 
 
-extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf);
+extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
+                                                unsigned char *dst_y,
+                                                int dst_ystride);
+extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
+                                         vp8_subpix_fn_t sppf);
 
 extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
 extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
-extern void vp8_build_inter_predictors_mb_e(MACROBLOCKD *xd);
 
 #endif

diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
deleted file mode 100644
index 22b531a..0000000
--- a/vp8/common/type_aliases.h
+++ /dev/null

@@ -1,117 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-*   Module Title :     type_aliases.h
-*
-*   Description  :     Standard type aliases
-*
-****************************************************************************/
-#ifndef __INC_TYPE_ALIASES_H
-#define __INC_TYPE_ALIASES_H
-
-/****************************************************************************
-* Macros
-****************************************************************************/
-#define EXPORT
-#define IMPORT          extern      /* Used to declare imported data & routines */
-#define PRIVATE         static      /* Used to declare & define module-local data */
-#define LOCAL           static      /* Used to define all persistent routine-local data */
-#define STD_IN_PATH     0           /* Standard input path */
-#define STD_OUT_PATH    1           /* Standard output path */
-#define STD_ERR_PATH    2           /* Standard error path */
-#define STD_IN_FILE     stdin       /* Standard input file pointer */
-#define STD_OUT_FILE    stdout      /* Standard output file pointer */
-#define STD_ERR_FILE    stderr      /* Standard error file pointer */
-#define max_int         0x7FFFFFFF
-
-#define __export
-#define _export
-
-#define CCONV
-
-#ifndef NULL
-#ifdef __cplusplus
-#define NULL    0
-#else
-#define NULL    ((void *)0)
-#endif
-#endif
-
-#ifndef FALSE
-#define FALSE   0
-#endif
-
-#ifndef TRUE
-#define TRUE    1
-#endif
-
-/****************************************************************************
-* Typedefs
-****************************************************************************/
-#ifndef TYPE_INT8
-#define TYPE_INT8
-typedef signed char     INT8;
-#endif
-
-#ifndef TYPE_INT16
-/*#define TYPE_INT16*/
-typedef signed short    INT16;
-#endif
-
-#ifndef TYPE_INT32
-/*#define TYPE_INT32*/
-typedef signed int      INT32;
-#endif
-
-#ifndef TYPE_UINT8
-/*#define TYPE_UINT8*/
-typedef unsigned char   UINT8;
-#endif
-
-#ifndef TYPE_UINT32
-/*#define TYPE_UINT32*/
-typedef unsigned int    UINT32;
-#endif
-
-#ifndef TYPE_UINT16
-/*#define TYPE_UINT16*/
-typedef unsigned short  UINT16;
-#endif
-
-#ifndef TYPE_BOOL
-/*#define TYPE_BOOL*/
-typedef int             BOOL;
-#endif
-
-typedef unsigned char   BOOLEAN;
-
-#ifdef _MSC_VER
-typedef __int64 INT64;
-#else
-
-#ifndef TYPE_INT64
-#ifdef _TMS320C6X
-/* for now we only have 40bits */
-typedef long INT64;
-#else
-typedef long long INT64;
-#endif
-#endif
-
-#endif
-
-/* Floating point */
-typedef  double         FLOAT64;
-typedef  float          FLOAT32;
-
-#endif

diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/common/x86/dequantize_mmx.asm
similarity index 100%
rename from vp8/decoder/x86/dequantize_mmx.asm
rename to vp8/common/x86/dequantize_mmx.asm


diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/common/x86/dequantize_x86.h
similarity index 100%
rename from vp8/decoder/x86/dequantize_x86.h
rename to vp8/common/x86/dequantize_x86.h


diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c
similarity index 91%
rename from vp8/decoder/x86/idct_blk_mmx.c
rename to vp8/common/x86/idct_blk_mmx.c
index 29276e5..49cebd6 100644
--- a/vp8/decoder/x86/idct_blk_mmx.c
+++ b/vp8/common/x86/idct_blk_mmx.c

@@ -10,7 +10,17 @@
 
 #include "vpx_config.h"
 #include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
+#include "vp8/common/dequantize.h"
+
+extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
+
+void vp8_dequantize_b_mmx(BLOCKD *d)
+{
+    short *sq = (short *) d->qcoeff;
+    short *dq = (short *) d->dqcoeff;
+    short *q = (short *) d->dequant;
+    vp8_dequantize_b_impl_mmx(sq, dq, q);
+}
 
 void vp8_dequant_idct_add_y_block_mmx
             (short *q, short *dq,

diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/common/x86/idct_blk_sse2.c
similarity index 98%
rename from vp8/decoder/x86/idct_blk_sse2.c
rename to vp8/common/x86/idct_blk_sse2.c
index 03c2878..44e440c 100644
--- a/vp8/decoder/x86/idct_blk_sse2.c
+++ b/vp8/common/x86/idct_blk_sse2.c

@@ -10,7 +10,7 @@
 
 #include "vpx_config.h"
 #include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
+#include "vp8/common/dequantize.h"
 
 void vp8_idct_dequant_0_2x_sse2
             (short *q, short *dq ,

diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 86927d9..2ad010a 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm

@@ -1385,52 +1385,54 @@
     SHADOW_ARGS_TO_STACK 3
     SAVE_XMM 7
     GET_GOT     rbx
-    push        rsi
-    push        rdi
     ; end prolog
 
-        mov         rsi, arg(0)             ;src_ptr
+        mov         rcx, arg(0)             ;src_ptr
         movsxd      rax, dword ptr arg(1)   ;src_pixel_step     ; destination pitch?
-        mov         rdx, arg(2)             ;blimit
-        movdqa      xmm3, XMMWORD PTR [rdx]
 
-        mov         rdi, rsi                ; rdi points to row +1 for indirect addressing
-        add         rdi, rax
+        lea         rdx, [rcx + rax]
         neg         rax
 
         ; calculate mask
-        movdqa      xmm1, [rsi+2*rax]       ; p1
-        movdqa      xmm0, [rdi]             ; q1
+        movdqa      xmm0, [rdx]             ; q1
+        mov         rdx, arg(2)             ;blimit
+        movdqa      xmm1, [rcx+2*rax]       ; p1
+
         movdqa      xmm2, xmm1
         movdqa      xmm7, xmm0
-        movdqa      xmm4, xmm0
+
         psubusb     xmm0, xmm1              ; q1-=p1
-        psubusb     xmm1, xmm4              ; p1-=q1
+        psubusb     xmm1, xmm7              ; p1-=q1
         por         xmm1, xmm0              ; abs(p1-q1)
         pand        xmm1, [GLOBAL(tfe)]     ; set lsb of each byte to zero
         psrlw       xmm1, 1                 ; abs(p1-q1)/2
 
-        movdqa      xmm5, [rsi+rax]         ; p0
-        movdqa      xmm4, [rsi]             ; q0
+        movdqa      xmm3, XMMWORD PTR [rdx]
+
+        movdqa      xmm5, [rcx+rax]         ; p0
+        movdqa      xmm4, [rcx]             ; q0
         movdqa      xmm0, xmm4              ; q0
         movdqa      xmm6, xmm5              ; p0
         psubusb     xmm5, xmm4              ; p0-=q0
         psubusb     xmm4, xmm6              ; q0-=p0
         por         xmm5, xmm4              ; abs(p0 - q0)
+
+        movdqa      xmm4, [GLOBAL(t80)]
+
         paddusb     xmm5, xmm5              ; abs(p0-q0)*2
         paddusb     xmm5, xmm1              ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
         psubusb     xmm5, xmm3              ; abs(p0 - q0) *2 + abs(p1-q1)/2  > blimit
         pxor        xmm3, xmm3
         pcmpeqb     xmm5, xmm3
 
+
         ; start work on filters
-        pxor        xmm2, [GLOBAL(t80)]     ; p1 offset to convert to signed values
-        pxor        xmm7, [GLOBAL(t80)]     ; q1 offset to convert to signed values
+        pxor        xmm2, xmm4     ; p1 offset to convert to signed values
+        pxor        xmm7, xmm4     ; q1 offset to convert to signed values
         psubsb      xmm2, xmm7              ; p1 - q1
 
-        pxor        xmm6, [GLOBAL(t80)]     ; offset to convert to signed values
-        pxor        xmm0, [GLOBAL(t80)]     ; offset to convert to signed values
+        pxor        xmm6, xmm4     ; offset to convert to signed values
+        pxor        xmm0, xmm4     ; offset to convert to signed values
         movdqa      xmm3, xmm0              ; q0
         psubsb      xmm0, xmm6              ; q0 - p0
         paddsb      xmm2, xmm0              ; p1 - q1 + 1 * (q0 - p0)
@@ -1438,42 +1440,36 @@
         paddsb      xmm2, xmm0              ; p1 - q1 + 3 * (q0 - p0)
         pand        xmm5, xmm2              ; mask filter values we don't care about
 
-        ; do + 4 side
-        paddsb      xmm5, [GLOBAL(t4)]      ; 3* (q0 - p0) + (p1 - q1) + 4
-
-        movdqa      xmm0, xmm5              ; get a copy of filters
-        psllw       xmm0, 8                 ; shift left 8
-        psraw       xmm0, 3                 ; arithmetic shift right 11
-        psrlw       xmm0, 8
-        movdqa      xmm1, xmm5              ; get a copy of filters
-        psraw       xmm1, 11                ; arithmetic shift right 11
-        psllw       xmm1, 8                 ; shift left 8 to put it back
-
-        por         xmm0, xmm1              ; put the two together to get result
-
-        psubsb      xmm3, xmm0              ; q0-= q0 add
-        pxor        xmm3, [GLOBAL(t80)]     ; unoffset
-        movdqa      [rsi], xmm3             ; write back
-
-        ; now do +3 side
+        paddsb      xmm5, [GLOBAL(t4)]      ;  3* (q0 - p0) + (p1 - q1) + 4
+        movdqa      xmm0, xmm5
         psubsb      xmm5, [GLOBAL(t1s)]     ; +3 instead of +4
 
-        movdqa      xmm0, xmm5              ; get a copy of filters
-        psllw       xmm0, 8                 ; shift left 8
-        psraw       xmm0, 3                 ; arithmetic shift right 11
-        psrlw       xmm0, 8
-        psraw       xmm5, 11                ; arithmetic shift right 11
-        psllw       xmm5, 8                 ; shift left 8 to put it back
-        por         xmm0, xmm5              ; put the two together to get result
+        movdqa      xmm1, [GLOBAL(te0)]
+        movdqa      xmm2, [GLOBAL(t1f)]
 
+        pxor        xmm7, xmm7
+        pcmpgtb     xmm7, xmm0              ;save sign
+        pand        xmm7, xmm1              ;preserve the upper 3 bits
+        psrlw       xmm0, 3
+        pand        xmm0, xmm2              ;clear out upper 3 bits
+        por         xmm0, xmm7              ;add sign
+        psubsb      xmm3, xmm0              ; q0-= q0sz add
 
-        paddsb      xmm6, xmm0              ; p0+= p0 add
-        pxor        xmm6, [GLOBAL(t80)]     ; unoffset
-        movdqa      [rsi+rax], xmm6         ; write back
+        pxor        xmm7, xmm7
+        pcmpgtb     xmm7, xmm5              ;save sign
+        pand        xmm7, xmm1              ;preserve the upper 3 bits
+        psrlw       xmm5, 3
+        pand        xmm5, xmm2              ;clear out upper 3 bits
+        por         xmm5, xmm7              ;add sign
+        paddsb      xmm6, xmm5              ; p0+= p0 add
+
+        pxor        xmm3, xmm4     ; unoffset
+        movdqa      [rcx], xmm3             ; write back
+
+        pxor        xmm6, xmm4     ; unoffset
+        movdqa      [rcx+rax], xmm6         ; write back
 
     ; begin epilog
-    pop rdi
-    pop rsi
     RESTORE_GOT
     RESTORE_XMM
     UNSHADOW_ARGS
@@ -1536,9 +1532,6 @@
         punpckldq   xmm0,       xmm1                    ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
         punpckhdq   xmm2,       xmm1                    ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
 
-        movdqa      t0,         xmm0                    ; save to t0
-        movdqa      t1,         xmm2                    ; save to t1
-
         lea         rsi,        [rsi + rax*8]
         lea         rdi,        [rsi + rax]
         lea         rdx,        [rsi + rax*4]
@@ -1551,26 +1544,24 @@
         punpckldq   xmm4,       xmm1                    ; c3 c2 c1 c0 83 82 81 80
         punpckldq   xmm6,       xmm3                    ; d3 d2 d1 d0 93 92 91 90
 
-        movd        xmm0,       [rsi + rax*2]           ; a3 a2 a1 a0
+        movd        xmm1,       [rsi + rax*2]           ; a3 a2 a1 a0
         movd        xmm5,       [rdx + rax*2]           ; e3 e2 e1 e0
-        movd        xmm2,       [rdi + rax*2]           ; b3 b2 b1 b0
+        movd        xmm3,       [rdi + rax*2]           ; b3 b2 b1 b0
         movd        xmm7,       [rcx + rax*2]           ; f3 f2 f1 f0
-        punpckldq   xmm0,       xmm5                    ; e3 e2 e1 e0 a3 a2 a1 a0
-        punpckldq   xmm2,       xmm7                    ; f3 f2 f1 f0 b3 b2 b1 b0
+        punpckldq   xmm1,       xmm5                    ; e3 e2 e1 e0 a3 a2 a1 a0
+        punpckldq   xmm3,       xmm7                    ; f3 f2 f1 f0 b3 b2 b1 b0
 
         punpcklbw   xmm4,       xmm6                    ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80
-        punpcklbw   xmm0,       xmm2                    ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0
+        punpcklbw   xmm1,       xmm3                    ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0
 
-        movdqa      xmm1,       xmm4
-        punpcklwd   xmm4,       xmm0                    ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
-        punpckhwd   xmm1,       xmm0                    ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
+        movdqa      xmm7,       xmm4
+        punpcklwd   xmm4,       xmm1                    ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
+        punpckhwd   xmm7,       xmm1                    ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
 
         movdqa      xmm6,       xmm4
-        punpckldq   xmm4,       xmm1                    ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
-        punpckhdq   xmm6,       xmm1                    ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+        punpckldq   xmm4,       xmm7                    ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+        punpckhdq   xmm6,       xmm7                    ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
 
-        movdqa      xmm0,       t0                      ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
-        movdqa      xmm2,       t1                      ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
         movdqa      xmm1,       xmm0
         movdqa      xmm3,       xmm2
 
@@ -1579,6 +1570,8 @@
         punpcklqdq  xmm2,       xmm6                    ; q0  f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
         punpckhqdq  xmm3,       xmm6                    ; q1  f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
 
+        mov         rdx,        arg(2)                          ;blimit
+
         ; calculate mask
         movdqa      xmm6,       xmm0                            ; p1
         movdqa      xmm7,       xmm3                            ; q1
@@ -1588,6 +1581,8 @@
         pand        xmm6,       [GLOBAL(tfe)]                   ; set lsb of each byte to zero
         psrlw       xmm6,       1                               ; abs(p1-q1)/2
 
+        movdqa      xmm7, [rdx]
+
         movdqa      xmm5,       xmm1                            ; p0
         movdqa      xmm4,       xmm2                            ; q0
         psubusb     xmm5,       xmm2                            ; p0-=q0
@@ -1596,8 +1591,7 @@
         paddusb     xmm5,       xmm5                            ; abs(p0-q0)*2
         paddusb     xmm5,       xmm6                            ; abs (p0 - q0) *2 + abs(p1-q1)/2
 
-        mov         rdx,        arg(2)                          ;blimit
-        movdqa      xmm7, XMMWORD PTR [rdx]
+        movdqa      xmm4, [GLOBAL(t80)]
 
         psubusb     xmm5,        xmm7                           ; abs(p0 - q0) *2 + abs(p1-q1)/2  > blimit
         pxor        xmm7,        xmm7
@@ -1607,59 +1601,48 @@
         movdqa        t0,        xmm0
         movdqa        t1,        xmm3
 
-        pxor        xmm0,        [GLOBAL(t80)]                  ; p1 offset to convert to signed values
-        pxor        xmm3,        [GLOBAL(t80)]                  ; q1 offset to convert to signed values
-
+        pxor        xmm0,        xmm4                  ; p1 offset to convert to signed values
+        pxor        xmm3,        xmm4                  ; q1 offset to convert to signed values
         psubsb      xmm0,        xmm3                           ; p1 - q1
+
         movdqa      xmm6,        xmm1                           ; p0
+;        movdqa      xmm7,        xmm2                           ; q0
 
-        movdqa      xmm7,        xmm2                           ; q0
-        pxor        xmm6,        [GLOBAL(t80)]                  ; offset to convert to signed values
+        pxor        xmm6,        xmm4                  ; offset to convert to signed values
+        pxor        xmm2,        xmm4                  ; offset to convert to signed values
 
-        pxor        xmm7,        [GLOBAL(t80)]                  ; offset to convert to signed values
-        movdqa      xmm3,        xmm7                           ; offseted ; q0
-
-        psubsb      xmm7,        xmm6                           ; q0 - p0
-        paddsb      xmm0,        xmm7                           ; p1 - q1 + 1 * (q0 - p0)
-
-        paddsb      xmm0,        xmm7                           ; p1 - q1 + 2 * (q0 - p0)
-        paddsb      xmm0,        xmm7                           ; p1 - q1 + 3 * (q0 - p0)
-
+        movdqa      xmm3,        xmm2                           ; offseted ; q0
+        psubsb      xmm2,        xmm6                           ; q0 - p0
+        paddsb      xmm0,        xmm2                           ; p1 - q1 + 1 * (q0 - p0)
+        paddsb      xmm0,        xmm2                           ; p1 - q1 + 2 * (q0 - p0)
+        paddsb      xmm0,        xmm2                           ; p1 - q1 + 3 * (q0 - p0)
         pand        xmm5,        xmm0                           ; mask filter values we don't care about
 
-
         paddsb      xmm5,        [GLOBAL(t4)]                   ;  3* (q0 - p0) + (p1 - q1) + 4
-
-        movdqa      xmm0,        xmm5                           ; get a copy of filters
-        psllw       xmm0,        8                              ; shift left 8
-
-        psraw       xmm0,        3                              ; arithmetic shift right 11
-        psrlw       xmm0,        8
-
-        movdqa      xmm7,        xmm5                           ; get a copy of filters
-        psraw       xmm7,        11                             ; arithmetic shift right 11
-
-        psllw       xmm7,        8                              ; shift left 8 to put it back
-        por         xmm0,        xmm7                           ; put the two together to get result
-
-        psubsb      xmm3,        xmm0                           ; q0-= q0sz add
-        pxor        xmm3,        [GLOBAL(t80)]                  ; unoffset   q0
-
-        ; now do +3 side
+        movdqa      xmm0, xmm5
         psubsb      xmm5,        [GLOBAL(t1s)]                  ; +3 instead of +4
-        movdqa      xmm0,        xmm5                           ; get a copy of filters
 
-        psllw       xmm0,        8                              ; shift left 8
-        psraw       xmm0,        3                              ; arithmetic shift right 11
+        movdqa  xmm1, [GLOBAL(te0)]
+        movdqa  xmm2, [GLOBAL(t1f)]
 
-        psrlw       xmm0,        8
-        psraw       xmm5,        11                             ; arithmetic shift right 11
+        pxor        xmm7, xmm7
+        pcmpgtb     xmm7, xmm0              ;save sign
+        pand        xmm7, xmm1              ;preserve the upper 3 bits
+        psrlw       xmm0, 3
+        pand        xmm0, xmm2              ;clear out upper 3 bits
+        por         xmm0, xmm7              ;add sign
+        psubsb      xmm3, xmm0              ; q0-= q0sz add
 
-        psllw       xmm5,        8                              ; shift left 8 to put it back
-        por         xmm0,        xmm5                           ; put the two together to get result
+        pxor        xmm7, xmm7
+        pcmpgtb     xmm7, xmm5              ;save sign
+        pand        xmm7, xmm1              ;preserve the upper 3 bits
+        psrlw       xmm5, 3
+        pand        xmm5, xmm2              ;clear out upper 3 bits
+        por         xmm5, xmm7              ;add sign
+        paddsb      xmm6, xmm5              ; p0+= p0 add
 
-        paddsb      xmm6,        xmm0                           ; p0+= p0 add
-        pxor        xmm6,        [GLOBAL(t80)]                  ; unoffset   p0
+        pxor        xmm3,        xmm4                  ; unoffset   q0
+        pxor        xmm6,        xmm4                  ; unoffset   p0
 
         movdqa      xmm0,        t0                             ; p1
         movdqa      xmm4,        t1                             ; q1
@@ -1763,3 +1746,9 @@
 align 16
 s63:
     times 8 dw 0x003f
+align 16
+te0:
+    times 16 db 0xe0
+align 16
+t1f:
+    times 16 db 0x1f

diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index a82c1b4..4b68ef5 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm

@@ -559,12 +559,492 @@
 vp8_intra_pred_uv_ho mmx2
 vp8_intra_pred_uv_ho ssse3
 
+;void vp8_intra_pred_y_dc_sse2(
+;    unsigned char *dst,
+;    int dst_stride
+;    unsigned char *src,
+;    int src_stride,
+;    )
+global sym(vp8_intra_pred_y_dc_sse2)
+sym(vp8_intra_pred_y_dc_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; from top
+    mov         rsi,        arg(2) ;src;
+    movsxd      rax,        dword ptr arg(3) ;src_stride;
+    sub         rsi,        rax
+    pxor        xmm0,       xmm0
+    movdqa      xmm1,       [rsi]
+    psadbw      xmm1,       xmm0
+    movq        xmm2,       xmm1
+    punpckhqdq  xmm1,       xmm1
+    paddw       xmm1,       xmm2
+
+    ; from left
+    dec         rsi
+    lea         rdi,        [rax*3]
+    movzx       ecx,        byte [rsi+rax]
+    movzx       edx,        byte [rsi+rax*2]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rdi]
+    add         ecx,        edx
+    lea         rsi,        [rsi+rax*4]
+    movzx       edx,        byte [rsi]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax*2]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rdi]
+    add         ecx,        edx
+    lea         rsi,        [rsi+rax*4]
+    movzx       edx,        byte [rsi]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax*2]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rdi]
+    add         ecx,        edx
+    lea         rsi,        [rsi+rax*4]
+    movzx       edx,        byte [rsi]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax*2]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rdi]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax*4]
+    add         ecx,        edx
+
+    ; add up
+    pextrw      edx,        xmm1, 0x0
+    lea         edx,        [edx+ecx+16]
+    sar         edx,        5
+    movd        xmm1,       edx
+    ; FIXME use pshufb for ssse3 version
+    pshuflw     xmm1,       xmm1, 0x0
+    punpcklqdq  xmm1,       xmm1
+    packuswb    xmm1,       xmm1
+
+    ; write out
+    mov         rsi,        2
+    mov         rdi,        arg(0) ;dst;
+    movsxd      rcx,        dword ptr arg(1) ;dst_stride
+    lea         rax,        [rcx*3]
+
+.label
+    movdqa [rdi      ],     xmm1
+    movdqa [rdi+rcx  ],     xmm1
+    movdqa [rdi+rcx*2],     xmm1
+    movdqa [rdi+rax  ],     xmm1
+    lea         rdi,        [rdi+rcx*4]
+    movdqa [rdi      ],     xmm1
+    movdqa [rdi+rcx  ],     xmm1
+    movdqa [rdi+rcx*2],     xmm1
+    movdqa [rdi+rax  ],     xmm1
+    lea         rdi,        [rdi+rcx*4]
+    dec         rsi
+    jnz .label
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_intra_pred_y_dctop_sse2(
+;    unsigned char *dst,
+;    int dst_stride
+;    unsigned char *src,
+;    int src_stride,
+;    )
+global sym(vp8_intra_pred_y_dctop_sse2)
+sym(vp8_intra_pred_y_dctop_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push        rsi
+    GET_GOT     rbx
+    ; end prolog
+
+    ; from top
+    mov         rcx,        arg(2) ;src;
+    movsxd      rax,        dword ptr arg(3) ;src_stride;
+    sub         rcx,        rax
+    pxor        xmm0,       xmm0
+    movdqa      xmm1,       [rcx]
+    psadbw      xmm1,       xmm0
+    movdqa      xmm2,       xmm1
+    punpckhqdq  xmm1,       xmm1
+    paddw       xmm1,       xmm2
+
+    ; add up
+    paddw       xmm1,       [GLOBAL(dc_8)]
+    psraw       xmm1,       4
+    ; FIXME use pshufb for ssse3 version
+    pshuflw     xmm1,       xmm1, 0x0
+    punpcklqdq  xmm1,       xmm1
+    packuswb    xmm1,       xmm1
+
+    ; write out
+    mov         rsi,        2
+    mov         rdx,        arg(0) ;dst;
+    movsxd      rcx,        dword ptr arg(1) ;dst_stride
+    lea         rax,        [rcx*3]
+
+.label
+    movdqa [rdx      ],     xmm1
+    movdqa [rdx+rcx  ],     xmm1
+    movdqa [rdx+rcx*2],     xmm1
+    movdqa [rdx+rax  ],     xmm1
+    lea         rdx,        [rdx+rcx*4]
+    movdqa [rdx      ],     xmm1
+    movdqa [rdx+rcx  ],     xmm1
+    movdqa [rdx+rcx*2],     xmm1
+    movdqa [rdx+rax  ],     xmm1
+    lea         rdx,        [rdx+rcx*4]
+    dec         rsi
+    jnz .label
+
+    ; begin epilog
+    RESTORE_GOT
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_intra_pred_y_dcleft_sse2(
+;    unsigned char *dst,
+;    int dst_stride
+;    unsigned char *src,
+;    int src_stride,
+;    )
+global sym(vp8_intra_pred_y_dcleft_sse2)
+sym(vp8_intra_pred_y_dcleft_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; from left
+    mov         rsi,        arg(2) ;src;
+    movsxd      rax,        dword ptr arg(3) ;src_stride;
+    dec         rsi
+    lea         rdi,        [rax*3]
+    movzx       ecx,        byte [rsi]
+    movzx       edx,        byte [rsi+rax]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax*2]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rdi]
+    add         ecx,        edx
+    lea         rsi,        [rsi+rax*4]
+    movzx       edx,        byte [rsi]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax*2]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rdi]
+    add         ecx,        edx
+    lea         rsi,        [rsi+rax*4]
+    movzx       edx,        byte [rsi]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax*2]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rdi]
+    add         ecx,        edx
+    lea         rsi,        [rsi+rax*4]
+    movzx       edx,        byte [rsi]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rax*2]
+    add         ecx,        edx
+    movzx       edx,        byte [rsi+rdi]
+    lea         edx,        [ecx+edx+8]
+
+    ; add up
+    shr         edx,        4
+    movd        xmm1,       edx
+    ; FIXME use pshufb for ssse3 version
+    pshuflw     xmm1,       xmm1, 0x0
+    punpcklqdq  xmm1,       xmm1
+    packuswb    xmm1,       xmm1
+
+    ; write out
+    mov         rsi,        2
+    mov         rdi,        arg(0) ;dst;
+    movsxd      rcx,        dword ptr arg(1) ;dst_stride
+    lea         rax,        [rcx*3]
+
+.label
+    movdqa [rdi      ],     xmm1
+    movdqa [rdi+rcx  ],     xmm1
+    movdqa [rdi+rcx*2],     xmm1
+    movdqa [rdi+rax  ],     xmm1
+    lea         rdi,        [rdi+rcx*4]
+    movdqa [rdi      ],     xmm1
+    movdqa [rdi+rcx  ],     xmm1
+    movdqa [rdi+rcx*2],     xmm1
+    movdqa [rdi+rax  ],     xmm1
+    lea         rdi,        [rdi+rcx*4]
+    dec         rsi
+    jnz .label
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_intra_pred_y_dc128_sse2(
+;    unsigned char *dst,
+;    int dst_stride
+;    unsigned char *src,
+;    int src_stride,
+;    )
+global sym(vp8_intra_pred_y_dc128_sse2)
+sym(vp8_intra_pred_y_dc128_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push        rsi
+    GET_GOT     rbx
+    ; end prolog
+
+    ; write out
+    mov         rsi,        2
+    movdqa      xmm1,       [GLOBAL(dc_128)]
+    mov         rax,        arg(0) ;dst;
+    movsxd      rdx,        dword ptr arg(1) ;dst_stride
+    lea         rcx,        [rdx*3]
+
+.label
+    movdqa [rax      ],     xmm1
+    movdqa [rax+rdx  ],     xmm1
+    movdqa [rax+rdx*2],     xmm1
+    movdqa [rax+rcx  ],     xmm1
+    lea         rax,        [rax+rdx*4]
+    movdqa [rax      ],     xmm1
+    movdqa [rax+rdx  ],     xmm1
+    movdqa [rax+rdx*2],     xmm1
+    movdqa [rax+rcx  ],     xmm1
+    lea         rax,        [rax+rdx*4]
+    dec         rsi
+    jnz .label
+
+    ; begin epilog
+    RESTORE_GOT
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_intra_pred_y_tm_sse2(
+;    unsigned char *dst,
+;    int dst_stride
+;    unsigned char *src,
+;    int src_stride,
+;    )
+%macro vp8_intra_pred_y_tm 1
+global sym(vp8_intra_pred_y_tm_%1)
+sym(vp8_intra_pred_y_tm_%1):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push        rsi
+    push        rdi
+    GET_GOT     rbx
+    ; end prolog
+
+    ; read top row
+    mov         edx,        8
+    mov         rsi,        arg(2) ;src;
+    movsxd      rax,        dword ptr arg(3) ;src_stride;
+    sub         rsi,        rax
+    pxor        xmm0,       xmm0
+%ifidn %1, ssse3
+    movdqa      xmm3,       [GLOBAL(dc_1024)]
+%endif
+    movdqa      xmm1,       [rsi]
+    movdqa      xmm2,       xmm1
+    punpcklbw   xmm1,       xmm0
+    punpckhbw   xmm2,       xmm0
+
+    ; set up left ptrs ans subtract topleft
+    movd        xmm4,       [rsi-1]
+    lea         rsi,        [rsi+rax-1]
+%ifidn %1, sse2
+    punpcklbw   xmm4,       xmm0
+    pshuflw     xmm4,       xmm4, 0x0
+    punpcklqdq  xmm4,       xmm4
+%else
+    pshufb      xmm4,       xmm3
+%endif
+    psubw       xmm1,       xmm4
+    psubw       xmm2,       xmm4
+
+    ; set up dest ptrs
+    mov         rdi,        arg(0) ;dst;
+    movsxd      rcx,        dword ptr arg(1) ;dst_stride
+vp8_intra_pred_y_tm_%1_loop:
+    movd        xmm4,       [rsi]
+    movd        xmm5,       [rsi+rax]
+%ifidn %1, sse2
+    punpcklbw   xmm4,       xmm0
+    punpcklbw   xmm5,       xmm0
+    pshuflw     xmm4,       xmm4, 0x0
+    pshuflw     xmm5,       xmm5, 0x0
+    punpcklqdq  xmm4,       xmm4
+    punpcklqdq  xmm5,       xmm5
+%else
+    pshufb      xmm4,       xmm3
+    pshufb      xmm5,       xmm3
+%endif
+    movdqa      xmm6,       xmm4
+    movdqa      xmm7,       xmm5
+    paddw       xmm4,       xmm1
+    paddw       xmm6,       xmm2
+    paddw       xmm5,       xmm1
+    paddw       xmm7,       xmm2
+    packuswb    xmm4,       xmm6
+    packuswb    xmm5,       xmm7
+    movdqa [rdi    ],       xmm4
+    movdqa [rdi+rcx],       xmm5
+    lea         rsi,        [rsi+rax*2]
+    lea         rdi,        [rdi+rcx*2]
+    dec         edx
+    jnz vp8_intra_pred_y_tm_%1_loop
+
+    ; begin epilog
+    RESTORE_GOT
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+%endmacro
+
+vp8_intra_pred_y_tm sse2
+vp8_intra_pred_y_tm ssse3
+
+;void vp8_intra_pred_y_ve_sse2(
+;    unsigned char *dst,
+;    int dst_stride
+;    unsigned char *src,
+;    int src_stride,
+;    )
+global sym(vp8_intra_pred_y_ve_sse2)
+sym(vp8_intra_pred_y_ve_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push        rsi
+    ; end prolog
+
+    ; read from top
+    mov         rax,        arg(2) ;src;
+    movsxd      rdx,        dword ptr arg(3) ;src_stride;
+    sub         rax,        rdx
+    movdqa      xmm1,       [rax]
+
+    ; write out
+    mov         rsi,        2
+    mov         rax,        arg(0) ;dst;
+    movsxd      rdx,        dword ptr arg(1) ;dst_stride
+    lea         rcx,        [rdx*3]
+
+.label
+    movdqa [rax      ],     xmm1
+    movdqa [rax+rdx  ],     xmm1
+    movdqa [rax+rdx*2],     xmm1
+    movdqa [rax+rcx  ],     xmm1
+    lea         rax,        [rax+rdx*4]
+    movdqa [rax      ],     xmm1
+    movdqa [rax+rdx  ],     xmm1
+    movdqa [rax+rdx*2],     xmm1
+    movdqa [rax+rcx  ],     xmm1
+    lea         rax,        [rax+rdx*4]
+    dec         rsi
+    jnz .label
+
+    ; begin epilog
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_intra_pred_y_ho_sse2(
+;    unsigned char *dst,
+;    int dst_stride
+;    unsigned char *src,
+;    int src_stride,
+;    )
+global sym(vp8_intra_pred_y_ho_sse2)
+sym(vp8_intra_pred_y_ho_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; read from left and write out
+    mov         edx,        8
+    mov         rsi,        arg(2) ;src;
+    movsxd      rax,        dword ptr arg(3) ;src_stride;
+    mov         rdi,        arg(0) ;dst;
+    movsxd      rcx,        dword ptr arg(1) ;dst_stride
+    dec         rsi
+
+vp8_intra_pred_y_ho_sse2_loop:
+    movd        xmm0,       [rsi]
+    movd        xmm1,       [rsi+rax]
+    ; FIXME use pshufb for ssse3 version
+    punpcklbw   xmm0,       xmm0
+    punpcklbw   xmm1,       xmm1
+    pshuflw     xmm0,       xmm0, 0x0
+    pshuflw     xmm1,       xmm1, 0x0
+    punpcklqdq  xmm0,       xmm0
+    punpcklqdq  xmm1,       xmm1
+    movdqa [rdi    ],       xmm0
+    movdqa [rdi+rcx],       xmm1
+    lea         rsi,        [rsi+rax*2]
+    lea         rdi,        [rdi+rcx*2]
+    dec         edx
+    jnz vp8_intra_pred_y_ho_sse2_loop
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
 SECTION_RODATA
+align 16
 dc_128:
-    times 8 db 128
+    times 16 db 128
 dc_4:
     times 4 dw 4
 align 16
+dc_8:
+    times 8 dw 8
+align 16
 dc_1024:
     times 8 dw 0x400
 align 16

diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
index fcc75a9..44221cd 100644
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c

@@ -94,3 +94,69 @@
                                         vp8_intra_pred_uv_tm_ssse3,
                                         vp8_intra_pred_uv_ho_ssse3);
 }
+
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dctop_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dcleft_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc128_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ho_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ve_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_ssse3);
+
+static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
+                                               unsigned char *dst_y,
+                                               int dst_stride,
+                                               build_intra_predictors_mbuv_fn_t tm_func)
+{
+    int mode = x->mode_info_context->mbmi.mode;
+    build_intra_predictors_mbuv_fn_t fn;
+    int src_stride = x->dst.y_stride;
+    switch (mode) {
+        case  V_PRED: fn = vp8_intra_pred_y_ve_sse2; break;
+        case  H_PRED: fn = vp8_intra_pred_y_ho_sse2; break;
+        case TM_PRED: fn = tm_func; break;
+        case DC_PRED:
+            if (x->up_available) {
+                if (x->left_available) {
+                    fn = vp8_intra_pred_y_dc_sse2; break;
+                } else {
+                    fn = vp8_intra_pred_y_dctop_sse2; break;
+                }
+            } else if (x->left_available) {
+                fn = vp8_intra_pred_y_dcleft_sse2; break;
+            } else {
+                fn = vp8_intra_pred_y_dc128_sse2; break;
+            }
+            break;
+        default: return;
+    }
+
+    fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
+    return;
+}
+
+void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x)
+{
+    vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+                                       vp8_intra_pred_y_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x)
+{
+    vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+                                       vp8_intra_pred_y_tm_ssse3);
+}
+
+void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x)
+{
+    vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+                                       vp8_intra_pred_y_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x)
+{
+    vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+                                       vp8_intra_pred_y_tm_ssse3);
+
+}

diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index fbb3dcb..afacc60 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h

@@ -42,6 +42,8 @@
 extern prototype_copy_block(vp8_copy_mem16x16_sse2);
 extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_sse2);
 extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_sse2);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_sse2);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_copy16x16
@@ -53,12 +55,20 @@
 #undef  vp8_recon_build_intra_predictors_mbuv_s
 #define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_sse2
 
+#undef  vp8_recon_build_intra_predictors_mby
+#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_sse2
+
+#undef  vp8_recon_build_intra_predictors_mby_s
+#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_sse2
+
 #endif
 #endif
 
 #if HAVE_SSSE3
 extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_ssse3);
 extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_ssse3);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_ssse3);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_build_intra_predictors_mbuv
@@ -67,6 +77,12 @@
 #undef  vp8_recon_build_intra_predictors_mbuv_s
 #define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3
 
+#undef  vp8_recon_build_intra_predictors_mby
+#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_ssse3
+
+#undef  vp8_recon_build_intra_predictors_mby_s
+#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3
+
 #endif
 #endif
 #endif

diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index b24cbe4..e1e1b79 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c

@@ -11,7 +11,6 @@
 
 #include "vpx_config.h"
 #include "vpx_ports/x86.h"
-#include "vp8/common/g_common.h"
 #include "vp8/common/subpixel.h"
 #include "vp8/common/loopfilter.h"
 #include "vp8/common/recon.h"
@@ -37,6 +36,11 @@
 
     if (flags & HAS_MMX)
     {
+        rtcd->dequant.block               = vp8_dequantize_b_mmx;
+        rtcd->dequant.idct_add            = vp8_dequant_idct_add_mmx;
+        rtcd->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_mmx;
+        rtcd->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_mmx;
+
         rtcd->idct.idct16       = vp8_short_idct4x4llm_mmx;
         rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_mmx;
@@ -81,6 +85,13 @@
             vp8_build_intra_predictors_mbuv_sse2;
         rtcd->recon.build_intra_predictors_mbuv_s =
             vp8_build_intra_predictors_mbuv_s_sse2;
+        rtcd->recon.build_intra_predictors_mby =
+            vp8_build_intra_predictors_mby_sse2;
+        rtcd->recon.build_intra_predictors_mby_s =
+            vp8_build_intra_predictors_mby_s_sse2;
+
+        rtcd->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_sse2;
+        rtcd->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_sse2;
 
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_sse2;
 
@@ -124,6 +135,10 @@
             vp8_build_intra_predictors_mbuv_ssse3;
         rtcd->recon.build_intra_predictors_mbuv_s =
             vp8_build_intra_predictors_mbuv_s_ssse3;
+        rtcd->recon.build_intra_predictors_mby =
+            vp8_build_intra_predictors_mby_ssse3;
+        rtcd->recon.build_intra_predictors_mby_s =
+            vp8_build_intra_predictors_mby_s_ssse3;
     }
 #endif
 

diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
index f802c51..bf0a348 100644
--- a/vp8/decoder/arm/arm_dsystemdependent.c
+++ b/vp8/decoder/arm/arm_dsystemdependent.c

@@ -11,9 +11,6 @@
 
 #include "vpx_config.h"
 #include "vpx_ports/arm.h"
-#include "vp8/common/blockd.h"
-#include "vp8/common/pragmas.h"
-#include "vp8/decoder/dequantize.h"
 #include "vp8/decoder/onyxd_int.h"
 
 void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
@@ -30,20 +27,12 @@
 #if HAVE_ARMV6
     if (flags & HAS_MEDIA)
     {
-        pbi->dequant.block               = vp8_dequantize_b_v6;
-        pbi->dequant.idct_add            = vp8_dequant_idct_add_v6;
-        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
-        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
     }
 #endif
 
 #if HAVE_ARMV7
     if (flags & HAS_NEON)
     {
-        pbi->dequant.block               = vp8_dequantize_b_neon;
-        pbi->dequant.idct_add            = vp8_dequant_idct_add_neon;
-        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
-        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
     }
 #endif
 #endif

diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 31eafcf..11d0e38 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c

@@ -15,7 +15,7 @@
 #include "vp8/common/reconintra4x4.h"
 #include "vp8/common/recon.h"
 #include "vp8/common/reconinter.h"
-#include "dequantize.h"
+#include "vp8/common/dequantize.h"
 #include "detokenize.h"
 #include "vp8/common/invtrans.h"
 #include "vp8/common/alloccommon.h"
@@ -32,7 +32,7 @@
 #endif
 #include "vpx_mem/vpx_mem.h"
 #include "vp8/common/idct.h"
-#include "dequantize.h"
+
 #include "vp8/common/threading.h"
 #include "decoderthreading.h"
 #include "dboolhuff.h"
@@ -109,32 +109,12 @@
 #define RTCD_VTABLE(x) NULL
 #endif
 
-/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
- *  to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
- */
-static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
-    if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
-    {
-        RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mbuv_s)(xd);
-        RECON_INVOKE(&pbi->common.rtcd.recon,
-                     build_intra_predictors_mby_s)(xd);
-    }
-    else
-    {
-        vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
-                                           xd->dst.u_buffer, xd->dst.v_buffer,
-                                           xd->dst.y_stride, xd->dst.uv_stride);
-    }
-}
-
 static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
                               unsigned int mb_idx)
 {
-    int eobtotal = 0;
-    int throw_residual = 0;
     MB_PREDICTION_MODE mode;
     int i;
+    int corruption_detected = 0;
 
     if (xd->mode_info_context->mbmi.mb_skip_coeff)
     {
@@ -142,28 +122,52 @@
     }
     else if (!vp8dx_bool_error(xd->current_bc))
     {
+        int eobtotal;
         eobtotal = vp8_decode_mb_tokens(pbi, xd);
+
+        /* Special case:  Force the loopfilter to skip when eobtotal is zero */
+        xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0);
     }
 
-
-
     mode = xd->mode_info_context->mbmi.mode;
 
-    if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV &&
-            !vp8dx_bool_error(xd->current_bc))
-    {
-        /* Special case:  Force the loopfilter to skip when eobtotal and
-         * mb_skip_coeff are zero.
-         * */
-        xd->mode_info_context->mbmi.mb_skip_coeff = 1;
-
-        skip_recon_mb(pbi, xd);
-        return;
-    }
-
     if (xd->segmentation_enabled)
         mb_init_dequantizer(pbi, xd);
 
+
+#if CONFIG_ERROR_CONCEALMENT
+
+    if(pbi->ec_active)
+    {
+        int throw_residual;
+        /* When we have independent partitions we can apply residual even
+         * though other partitions within the frame are corrupt.
+         */
+        throw_residual = (!pbi->independent_partitions &&
+                          pbi->frame_corrupt_residual);
+        throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc));
+
+        if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual))
+        {
+            /* MB with corrupt residuals or corrupt mode/motion vectors.
+             * Better to use the predictor as reconstruction.
+             */
+            pbi->frame_corrupt_residual = 1;
+            vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
+            vp8_conceal_corrupt_mb(xd);
+
+
+            corruption_detected = 1;
+
+            /* force idct to be skipped for B_PRED and use the
+             * prediction only for reconstruction
+             * */
+            vpx_memset(xd->eobs, 0, 25);
+        }
+    }
+#endif
+
+
     /* do prediction */
     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
@@ -173,121 +177,117 @@
         {
             RECON_INVOKE(&pbi->common.rtcd.recon,
                          build_intra_predictors_mby_s)(xd);
-        } else {
+        }
+        else
+        {
+            /* clear out residual eob info */
+            if(xd->mode_info_context->mbmi.mb_skip_coeff)
+                vpx_memset(xd->eobs, 0, 25);
+
             vp8_intra_prediction_down_copy(xd);
+
+            for (i = 0; i < 16; i++)
+            {
+                BLOCKD *b = &xd->block[i];
+                int b_mode = xd->mode_info_context->bmi[i].as_mode;
+
+                RECON_INVOKE(RTCD_VTABLE(recon), intra4x4_predict)
+                              ( *(b->base_dst) + b->dst, b->dst_stride, b_mode,
+                                *(b->base_dst) + b->dst, b->dst_stride );
+
+                if (xd->eobs[i])
+                {
+                    if (xd->eobs[i] > 1)
+                    {
+                        DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
+                            (b->qcoeff, b->dequant,
+                            *(b->base_dst) + b->dst, b->dst_stride);
+                    }
+                    else
+                    {
+                        IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+                            (b->qcoeff[0] * b->dequant[0],
+                            *(b->base_dst) + b->dst, b->dst_stride,
+                            *(b->base_dst) + b->dst, b->dst_stride);
+                        ((int *)b->qcoeff)[0] = 0;
+                    }
+                }
+            }
         }
     }
     else
     {
         vp8_build_inter_predictors_mb(xd);
     }
-    /* When we have independent partitions we can apply residual even
-     * though other partitions within the frame are corrupt.
-     */
-    throw_residual = (!pbi->independent_partitions &&
-                      pbi->frame_corrupt_residual);
-    throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc));
+
 
 #if CONFIG_ERROR_CONCEALMENT
-    if (pbi->ec_active &&
-        (mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual))
+    if (corruption_detected)
     {
-        /* MB with corrupt residuals or corrupt mode/motion vectors.
-         * Better to use the predictor as reconstruction.
-         */
-        pbi->frame_corrupt_residual = 1;
-        vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
-        vp8_conceal_corrupt_mb(xd);
         return;
     }
 #endif
 
-    /* dequantization and idct */
-    if (mode == B_PRED)
+    if(!xd->mode_info_context->mbmi.mb_skip_coeff)
     {
-        for (i = 0; i < 16; i++)
+        /* dequantization and idct */
+        if (mode != B_PRED)
         {
-            BLOCKD *b = &xd->block[i];
-            int b_mode = xd->mode_info_context->bmi[i].as_mode;
+            short *DQC = xd->block[0].dequant;
 
-            RECON_INVOKE(RTCD_VTABLE(recon), intra4x4_predict)
-                          ( *(b->base_dst) + b->dst, b->dst_stride, b_mode,
-                            *(b->base_dst) + b->dst, b->dst_stride );
+            /* save the dc dequant constant in case it is overridden */
+            short dc_dequant_temp = DQC[0];
 
-            if (xd->eobs[i] )
+            if (mode != SPLITMV)
             {
-                if (xd->eobs[i] > 1)
+                BLOCKD *b = &xd->block[24];
+
+                /* do 2nd order transform on the dc block */
+                if (xd->eobs[24] > 1)
                 {
-                    DEQUANT_INVOKE(&pbi->dequant, idct_add)
-                        (b->qcoeff, b->dequant,
-                        *(b->base_dst) + b->dst, b->dst_stride);
+                    DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
+
+                    IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
+                        xd->qcoeff);
+                    ((int *)b->qcoeff)[0] = 0;
+                    ((int *)b->qcoeff)[1] = 0;
+                    ((int *)b->qcoeff)[2] = 0;
+                    ((int *)b->qcoeff)[3] = 0;
+                    ((int *)b->qcoeff)[4] = 0;
+                    ((int *)b->qcoeff)[5] = 0;
+                    ((int *)b->qcoeff)[6] = 0;
+                    ((int *)b->qcoeff)[7] = 0;
                 }
                 else
                 {
-                    IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-                        (b->qcoeff[0] * b->dequant[0],
-                        *(b->base_dst) + b->dst, b->dst_stride,
-                        *(b->base_dst) + b->dst, b->dst_stride);
+                    b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];
+                    IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0],
+                        xd->qcoeff);
                     ((int *)b->qcoeff)[0] = 0;
                 }
-            }
-        }
-    }
-    else
-    {
-        short *DQC = xd->block[0].dequant;
 
-        /* save the dc dequant constant in case it is overridden */
-        short dc_dequant_temp = DQC[0];
-
-        if (mode != SPLITMV)
-        {
-            BLOCKD *b = &xd->block[24];
-
-            /* do 2nd order transform on the dc block */
-            if (xd->eobs[24] > 1)
-            {
-                DEQUANT_INVOKE(&pbi->dequant, block)(b);
-
-                IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
-                    xd->qcoeff);
-                ((int *)b->qcoeff)[0] = 0;
-                ((int *)b->qcoeff)[1] = 0;
-                ((int *)b->qcoeff)[2] = 0;
-                ((int *)b->qcoeff)[3] = 0;
-                ((int *)b->qcoeff)[4] = 0;
-                ((int *)b->qcoeff)[5] = 0;
-                ((int *)b->qcoeff)[6] = 0;
-                ((int *)b->qcoeff)[7] = 0;
-            }
-            else
-            {
-                b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];
-                IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0],
-                    xd->qcoeff);
-                ((int *)b->qcoeff)[0] = 0;
+                /* override the dc dequant constant in order to preserve the
+                 * dc components
+                 */
+                DQC[0] = 1;
             }
 
-            /* override the dc dequant constant */
-            DQC[0] = 1;
+            DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
+                            (xd->qcoeff, xd->block[0].dequant,
+                             xd->dst.y_buffer,
+                             xd->dst.y_stride, xd->eobs);
+
+            /* restore the dc dequant constant */
+            DQC[0] = dc_dequant_temp;
         }
 
-        DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
-                        (xd->qcoeff, xd->block[0].dequant,
-                         xd->dst.y_buffer,
-                         xd->dst.y_stride, xd->eobs);
-
-        /* restore the dc dequant constant */
-        DQC[0] = dc_dequant_temp;
+        DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
+                        (xd->qcoeff+16*16, xd->block[16].dequant,
+                         xd->dst.u_buffer, xd->dst.v_buffer,
+                         xd->dst.uv_stride, xd->eobs+16);
     }
-
-    DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
-                    (xd->qcoeff+16*16, xd->block[16].dequant,
-                     xd->dst.u_buffer, xd->dst.v_buffer,
-                     xd->dst.uv_stride, xd->eobs+16);
 }
 
-
 static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
 {
     int ret_val = 0;
@@ -484,7 +484,8 @@
                                 const unsigned char* token_part_sizes)
 {
     vp8_reader *bool_decoder = &pbi->bc2;
-    int fragment_idx, partition_idx;
+    unsigned int partition_idx;
+    int fragment_idx;
     int num_token_partitions;
     const unsigned char *first_fragment_end = pbi->fragments[0] +
                                           pbi->fragment_sizes[0];

diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 1d45685..ba94c58 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c

@@ -15,7 +15,7 @@
 #include "vpx_ports/mem.h"
 #include "detokenize.h"
 
-#define BOOL_DATA UINT8
+#define BOOL_DATA unsigned char
 
 #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
 DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
@@ -157,10 +157,10 @@
     DECODE_AND_APPLYSIGN(val) \
     Prob = coef_probs + (ENTROPY_NODES*2); \
     if(c < 15){\
-        qcoeff_ptr [ scan[c] ] = (INT16) v; \
+        qcoeff_ptr [ scan[c] ] = (int16_t) v; \
         ++c; \
         goto DO_WHILE; }\
-    qcoeff_ptr [ 15 ] = (INT16) v; \
+    qcoeff_ptr [ 15 ] = (int16_t) v; \
     goto BLOCK_FINISHED;
 
 
@@ -172,7 +172,7 @@
     {\
         range = range-split;\
         value = value-bigsplit;\
-        val += ((UINT16)1<<bits_count);\
+        val += ((uint16_t)1<<bits_count);\
     }\
     else\
     {\
@@ -340,12 +340,12 @@
 
     if (c < 15)
     {
-        qcoeff_ptr [ scan[c] ] = (INT16) v;
+        qcoeff_ptr [ scan[c] ] = (int16_t) v;
         ++c;
         goto DO_WHILE;
     }
 
-    qcoeff_ptr [ 15 ] = (INT16) v;
+    qcoeff_ptr [ 15 ] = (int16_t) v;
 BLOCK_FINISHED:
     eobs[i] = c;
     eobtotal += c;

diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index d9f9ba3..8a84e56 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c

@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vp8/decoder/dequantize.h"
+#include "vp8/common/dequantize.h"
 #include "vp8/decoder/onyxd_int.h"
 
 extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
@@ -20,11 +20,7 @@
 {
     /* Pure C: */
 #if CONFIG_RUNTIME_CPU_DETECT
-    pbi->mb.rtcd                     = &pbi->common.rtcd;
-    pbi->dequant.block               = vp8_dequantize_b_c;
-    pbi->dequant.idct_add            = vp8_dequant_idct_add_c;
-    pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_c;
-    pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_c;
+    pbi->mb.rtcd                               = &pbi->common.rtcd;
 #endif
 
 #if ARCH_X86 || ARCH_X86_64

diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index cf525f4..80648d3 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c

@@ -20,7 +20,6 @@
 #include "vpx_scale/yv12extend.h"
 #include "vp8/common/loopfilter.h"
 #include "vp8/common/swapyv12buffer.h"
-#include "vp8/common/g_common.h"
 #include "vp8/common/threading.h"
 #include "decoderthreading.h"
 #include <stdio.h>
@@ -57,7 +56,7 @@
 }
 
 
-VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
+struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
 {
     VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
 
@@ -117,14 +116,12 @@
      */
     pbi->independent_partitions = 0;
 
-    return (VP8D_PTR) pbi;
+    return pbi;
 }
 
 
-void vp8dx_remove_decompressor(VP8D_PTR ptr)
+void vp8dx_remove_decompressor(VP8D_COMP *pbi)
 {
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
     if (!pbi)
         return;
 
@@ -142,9 +139,8 @@
 }
 
 
-vpx_codec_err_t vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
 {
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
     int ref_fb_idx;
 
@@ -174,9 +170,8 @@
 }
 
 
-vpx_codec_err_t vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
 {
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
     int *ref_fb_ptr = NULL;
     int free_fb;
@@ -301,19 +296,18 @@
     return err;
 }
 
-int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, int64_t time_stamp)
+int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsigned char *source, int64_t time_stamp)
 {
 #if HAVE_ARMV7
     int64_t dx_store_reg[8];
 #endif
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
     int retcode = 0;
 
     /*if(pbi->ready_for_new_data == 0)
         return -1;*/
 
-    if (ptr == 0)
+    if (pbi == 0)
     {
         return -1;
     }
@@ -575,10 +569,9 @@
     pbi->common.error.setjmp = 0;
     return retcode;
 }
-int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags)
+int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags)
 {
     int ret = -1;
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
 
     if (pbi->ready_for_new_data == 1)
         return ret;

diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 519a7f2..cb2593b 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h

@@ -16,7 +16,8 @@
 #include "treereader.h"
 #include "vp8/common/onyxc_int.h"
 #include "vp8/common/threading.h"
-#include "dequantize.h"
+
+
 #if CONFIG_ERROR_CONCEALMENT
 #include "ec_types.h"
 #endif
@@ -43,7 +44,7 @@
 } DATARATE;
 
 
-typedef struct VP8Decompressor
+typedef struct VP8D_COMP
 {
     DECLARE_ALIGNED(16, MACROBLOCKD, mb);
 
@@ -93,11 +94,6 @@
 
     DATARATE dr[16];
 
-#if CONFIG_RUNTIME_CPU_DETECT
-    vp8_dequant_rtcd_vtable_t        dequant;
-#endif
-
-
     vp8_prob prob_intra;
     vp8_prob prob_last;
     vp8_prob prob_gf;

diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 1967781..947b3a1 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c

@@ -189,7 +189,7 @@
             {
                 if (xd->eobs[i] > 1)
                 {
-                    DEQUANT_INVOKE(&pbi->dequant, idct_add)
+                    DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
                         (b->qcoeff, b->dequant,
                         *(b->base_dst) + b->dst, b->dst_stride);
                 }
@@ -217,7 +217,7 @@
             /* do 2nd order transform on the dc block */
             if (xd->eobs[24] > 1)
             {
-                DEQUANT_INVOKE(&pbi->dequant, block)(b);
+                DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
 
                 IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
                     xd->qcoeff);
@@ -248,13 +248,13 @@
             DQC = local_dequant;
         }
 
-        DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+        DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
                         (xd->qcoeff, DQC,
                          xd->dst.y_buffer,
                          xd->dst.y_stride, xd->eobs);
     }
 
-    DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+    DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
                     (xd->qcoeff+16*16, xd->block[16].dequant,
                      xd->dst.u_buffer, xd->dst.v_buffer,
                      xd->dst.uv_stride, xd->eobs+16);

diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 91dba7e..27bf5dd 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c

@@ -13,47 +13,7 @@
 #include "vpx_ports/x86.h"
 #include "vp8/decoder/onyxd_int.h"
 
-
-#if HAVE_MMX
-void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
-
-void vp8_dequantize_b_mmx(BLOCKD *d)
-{
-    short *sq = (short *) d->qcoeff;
-    short *dq = (short *) d->dqcoeff;
-    short *q = (short *) d->dequant;
-    vp8_dequantize_b_impl_mmx(sq, dq, q);
-}
-#endif
-
 void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
 {
-#if CONFIG_RUNTIME_CPU_DETECT
-    int flags = x86_simd_caps();
 
-    /* Note:
-     *
-     * This platform can be built without runtime CPU detection as well. If
-     * you modify any of the function mappings present in this file, be sure
-     * to also update them in static mapings (<arch>/filename_<arch>.h)
-     */
-    /* Override default functions with fastest ones for this CPU. */
-#if HAVE_MMX
-    if (flags & HAS_MMX)
-    {
-        pbi->dequant.block               = vp8_dequantize_b_mmx;
-        pbi->dequant.idct_add            = vp8_dequant_idct_add_mmx;
-        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_mmx;
-        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_mmx;
-    }
-#endif
-#if HAVE_SSE2
-    if (flags & HAS_SSE2)
-    {
-        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_sse2;
-        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_sse2;
-    }
-#endif
-
-#endif
 }

diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c
index e77be9f..7fc7473 100644
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c

@@ -11,9 +11,9 @@
 #include "vpx_config.h"
 #include "vp8/encoder/variance.h"
 #include "vp8/common/filter.h"
-#include "vp8/common/arm/bilinearfilter_arm.h"
 
 #if HAVE_ARMV6
+#include "vp8/common/arm/bilinearfilter_arm.h"
 
 unsigned int vp8_sub_pixel_variance8x8_armv6
 (

diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 5eea39c..669bfad 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c

@@ -878,7 +878,28 @@
         }
     }
 }
+void vp8_convert_rfct_to_prob(VP8_COMP *const cpi)
+{
+    const int *const rfct = cpi->count_mb_ref_frame_usage;
+    const int rf_intra = rfct[INTRA_FRAME];
+    const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 
+    // Calculate the probabilities used to code the ref frame based on useage
+    if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter)))
+        cpi->prob_intra_coded = 1;
+
+    cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
+
+    if (!cpi->prob_last_coded)
+        cpi->prob_last_coded = 1;
+
+    cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
+                  ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
+
+    if (!cpi->prob_gf_coded)
+        cpi->prob_gf_coded = 1;
+
+}
 
 static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 {
@@ -886,36 +907,17 @@
     vp8_writer *const w = cpi->bc;
     const MV_CONTEXT *mvc = pc->fc.mvc;
 
-    const int *const rfct = cpi->count_mb_ref_frame_usage;
-    const int rf_intra = rfct[INTRA_FRAME];
-    const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 
     MODE_INFO *m = pc->mi, *ms;
     const int mis = pc->mode_info_stride;
     int mb_row = -1;
 
-    int prob_last_coded;
-    int prob_gf_coded;
     int prob_skip_false = 0;
     ms = pc->mi - 1;
 
     cpi->mb.partition_info = cpi->mb.pi;
 
-    // Calculate the probabilities to be used to code the reference frame based on actual useage this frame
-    if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter)))
-        cpi->prob_intra_coded = 1;
-
-    prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
-
-    if (!prob_last_coded)
-        prob_last_coded = 1;
-
-    prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
-                    ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
-
-    if (!prob_gf_coded)
-        prob_gf_coded = 1;
-
+    vp8_convert_rfct_to_prob(cpi);
 
 #ifdef ENTROPY_STATS
     active_section = 1;
@@ -936,8 +938,8 @@
     }
 
     vp8_write_literal(w, cpi->prob_intra_coded, 8);
-    vp8_write_literal(w, prob_last_coded, 8);
-    vp8_write_literal(w, prob_gf_coded, 8);
+    vp8_write_literal(w, cpi->prob_last_coded, 8);
+    vp8_write_literal(w, cpi->prob_gf_coded, 8);
 
     update_mbintra_mode_probs(cpi);
 
@@ -999,11 +1001,11 @@
                 vp8_write(w, 1, cpi->prob_intra_coded);
 
                 if (rf == LAST_FRAME)
-                    vp8_write(w, 0, prob_last_coded);
+                    vp8_write(w, 0, cpi->prob_last_coded);
                 else
                 {
-                    vp8_write(w, 1, prob_last_coded);
-                    vp8_write(w, (rf == GOLDEN_FRAME) ? 0 : 1, prob_gf_coded);
+                    vp8_write(w, 1, cpi->prob_last_coded);
+                    vp8_write(w, (rf == GOLDEN_FRAME) ? 0 : 1, cpi->prob_gf_coded);
                 }
 
                 {
@@ -1204,7 +1206,7 @@
     {
         for (j=0; j < PREV_COEF_CONTEXTS; ++j)
         {
-            const int tmp = out[i];
+            const unsigned int tmp = out[i];
             out[i] += probs[j][i];
             /* check for wrap */
             if (out[i] < tmp)
@@ -1355,6 +1357,24 @@
     return savings;
 }
 
+void vp8_calc_ref_frame_costs(int *ref_frame_cost,
+                              int prob_intra,
+                              int prob_last,
+                              int prob_garf
+                             )
+{
+    ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(prob_intra);
+    ref_frame_cost[LAST_FRAME]    = vp8_cost_one(prob_intra)
+                                    + vp8_cost_zero(prob_last);
+    ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(prob_intra)
+                                    + vp8_cost_one(prob_last)
+                                    + vp8_cost_zero(prob_garf);
+    ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(prob_intra)
+                                    + vp8_cost_one(prob_last)
+                                    + vp8_cost_one(prob_garf);
+
+}
+
 int vp8_estimate_entropy_savings(VP8_COMP *cpi)
 {
     int savings = 0;
@@ -1362,7 +1382,7 @@
     const int *const rfct = cpi->count_mb_ref_frame_usage;
     const int rf_intra = rfct[INTRA_FRAME];
     const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
-    int new_intra, new_last, gf_last, oldtotal, newtotal;
+    int new_intra, new_last, new_garf, oldtotal, newtotal;
     int ref_frame_cost[MAX_REF_FRAMES];
 
     vp8_clear_system_state(); //__asm emms;
@@ -1374,19 +1394,11 @@
 
         new_last = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
 
-        gf_last = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
+        new_garf = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
                   ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
 
-        // new costs
-        ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(new_intra);
-        ref_frame_cost[LAST_FRAME]    = vp8_cost_one(new_intra)
-                                        + vp8_cost_zero(new_last);
-        ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(new_intra)
-                                        + vp8_cost_one(new_last)
-                                        + vp8_cost_zero(gf_last);
-        ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(new_intra)
-                                        + vp8_cost_one(new_last)
-                                        + vp8_cost_one(gf_last);
+
+        vp8_calc_ref_frame_costs(ref_frame_cost,new_intra,new_last,new_garf);
 
         newtotal =
             rfct[INTRA_FRAME] * ref_frame_cost[INTRA_FRAME] +
@@ -1396,15 +1408,8 @@
 
 
         // old costs
-        ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(cpi->prob_intra_coded);
-        ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_zero(cpi->prob_last_coded);
-        ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(cpi->prob_last_coded)
-                                        + vp8_cost_zero(cpi->prob_gf_coded);
-        ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(cpi->prob_last_coded)
-                                        + vp8_cost_one(cpi->prob_gf_coded);
+        vp8_calc_ref_frame_costs(ref_frame_cost,cpi->prob_intra_coded,
+                                 cpi->prob_last_coded,cpi->prob_gf_coded);
 
         oldtotal =
             rfct[INTRA_FRAME] * ref_frame_cost[INTRA_FRAME] +
@@ -1639,20 +1644,20 @@
 
 
     // Signal whether or not Segmentation is enabled
-    vp8_write_bit(bc, (xd->segmentation_enabled) ? 1 : 0);
+    vp8_write_bit(bc, xd->segmentation_enabled);
 
     // Indicate which features are enabled
     if (xd->segmentation_enabled)
     {
         // Signal whether or not the segmentation map is being updated.
-        vp8_write_bit(bc, (xd->update_mb_segmentation_map) ? 1 : 0);
-        vp8_write_bit(bc, (xd->update_mb_segmentation_data) ? 1 : 0);
+        vp8_write_bit(bc, xd->update_mb_segmentation_map);
+        vp8_write_bit(bc, xd->update_mb_segmentation_data);
 
         if (xd->update_mb_segmentation_data)
         {
             signed char Data;
 
-            vp8_write_bit(bc, (xd->mb_segement_abs_delta) ? 1 : 0);
+            vp8_write_bit(bc, xd->mb_segement_abs_delta);
 
             // For each segmentation feature (Quant and loop filter level)
             for (i = 0; i < MB_LVL_MAX; i++)
@@ -1709,7 +1714,7 @@
     vp8_write_literal(bc, pc->sharpness_level, 3);
 
     // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled).
-    vp8_write_bit(bc, (xd->mode_ref_lf_delta_enabled) ? 1 : 0);
+    vp8_write_bit(bc, xd->mode_ref_lf_delta_enabled);
 
     if (xd->mode_ref_lf_delta_enabled)
     {

diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 5e5a60d..0a74ca4 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h

@@ -45,10 +45,6 @@
     unsigned char **base_src;
     int src;
     int src_stride;
-
-//  MV  enc_mv;
-    int force_empty;
-
 } BLOCK;
 
 typedef struct
@@ -107,7 +103,6 @@
     int mv_row_min;
     int mv_row_max;
 
-    int vector_range;    // Used to monitor limiting range of recent vectors to guide search.
     int skip;
 
     int encode_breakout;

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 0927f51..6a9ba29 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c

@@ -39,7 +39,12 @@
 #define IF_RTCD(x)  NULL
 #endif
 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
-
+extern void vp8_calc_ref_frame_costs(int *ref_frame_cost,
+                                     int prob_intra,
+                                     int prob_last,
+                                     int prob_garf
+                                    );
+extern void vp8_convert_rfct_to_prob(VP8_COMP *const cpi);
 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
 extern void vp8_auto_select_speed(VP8_COMP *cpi);
 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
@@ -590,8 +595,6 @@
     // Activity map pointer
     x->mb_activity_ptr = cpi->mb_activity_map;
 
-    x->vector_range = 32;
-
     x->act_zbin_adj = 0;
 
     x->partition_info = x->pi;
@@ -636,55 +639,23 @@
     vpx_memset(cm->above_context, 0,
                sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 
-    xd->ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(cpi->prob_intra_coded);
-
     // Special case treatment when GF and ARF are not sensible options for reference
     if (cpi->ref_frame_flags == VP8_LAST_FLAG)
-    {
-        xd->ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_zero(255);
-        xd->ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(255)
-                                        + vp8_cost_zero(128);
-        xd->ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(255)
-                                        + vp8_cost_one(128);
-    }
+        vp8_calc_ref_frame_costs(xd->ref_frame_cost,
+                                 cpi->prob_intra_coded,255,128);
     else if ((cpi->oxcf.number_of_layers > 1) &&
                (cpi->ref_frame_flags == VP8_GOLD_FLAG))
-    {
-        xd->ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_zero(1);
-        xd->ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(1)
-                                        + vp8_cost_zero(255);
-        xd->ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(1)
-                                        + vp8_cost_one(255);
-    }
+        vp8_calc_ref_frame_costs(xd->ref_frame_cost,
+                                 cpi->prob_intra_coded,1,255);
     else if ((cpi->oxcf.number_of_layers > 1) &&
                 (cpi->ref_frame_flags == VP8_ALT_FLAG))
-    {
-        xd->ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_zero(1);
-        xd->ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(1)
-                                        + vp8_cost_zero(1);
-        xd->ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(1)
-                                        + vp8_cost_one(1);
-    }
+        vp8_calc_ref_frame_costs(xd->ref_frame_cost,
+                                 cpi->prob_intra_coded,1,1);
     else
-    {
-        xd->ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_zero(cpi->prob_last_coded);
-        xd->ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(cpi->prob_last_coded)
-                                        + vp8_cost_zero(cpi->prob_gf_coded);
-        xd->ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(cpi->prob_last_coded)
-                                        + vp8_cost_one(cpi->prob_gf_coded);
-    }
+        vp8_calc_ref_frame_costs(xd->ref_frame_cost,
+                                 cpi->prob_intra_coded,
+                                 cpi->prob_last_coded,
+                                 cpi->prob_gf_coded);
 
     xd->fullpixel_mask = 0xffffffff;
     if(cm->full_pixel)
@@ -966,31 +937,7 @@
     if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) ||
         (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)))
     {
-        const int *const rfct = cpi->count_mb_ref_frame_usage;
-        const int rf_intra = rfct[INTRA_FRAME];
-        const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
-
-        if ((rf_intra + rf_inter) > 0)
-        {
-            cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
-
-            if (cpi->prob_intra_coded < 1)
-                cpi->prob_intra_coded = 1;
-
-            if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
-            {
-                cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
-
-                if (cpi->prob_last_coded < 1)
-                    cpi->prob_last_coded = 1;
-
-                cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
-                                     ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
-
-                if (cpi->prob_gf_coded < 1)
-                    cpi->prob_gf_coded = 1;
-            }
-        }
+      vp8_convert_rfct_to_prob(cpi);
     }
 
 #if 0
@@ -1142,8 +1089,10 @@
 #endif
 }
 
-int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int mb_row, int mb_col)
+int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
+                                   int mb_row, int mb_col)
 {
+    MACROBLOCKD *xd = &x->e_mbd;
     int rate;
 
     if (cpi->sf.RD && cpi->compressor_speed != 2)
@@ -1163,14 +1112,17 @@
         vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
 
     vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
+
     sum_intra_stats(cpi, x);
     vp8_tokenize_mb(cpi, &x->e_mbd, t);
 
-    if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED)
-        vp8_inverse_transform_mby(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd);
+    if (xd->mode_info_context->mbmi.mode != B_PRED)
+        vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
 
-    vp8_inverse_transform_mbuv(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd);
-
+    DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
+                    (xd->qcoeff+16*16, xd->block[16].dequant,
+                     xd->dst.u_buffer, xd->dst.v_buffer,
+                     xd->dst.uv_stride, xd->eobs+16);
     return rate;
 }
 #ifdef SPEEDSTATS
@@ -1232,23 +1184,8 @@
     }
     else
     {
-#if CONFIG_MULTI_RES_ENCODING
-        if (cpi->oxcf.mr_encoder_id == 0)
-        {
-            /* Lowest-resolution encoding */
-            vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                                    &distortion, &intra_error);
-
-        }else
-        {
-            /* Higher-resolution encoding */
-            vp8_mr_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                                &distortion, &intra_error, mb_row, mb_col);
-        }
-#else
         vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                            &distortion, &intra_error);
-#endif
+                            &distortion, &intra_error, mb_row, mb_col);
     }
 
     cpi->prediction_error += distortion;
@@ -1363,12 +1300,14 @@
     if (!x->skip)
     {
         vp8_tokenize_mb(cpi, xd, t);
-        if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED)
-        {
-          vp8_inverse_transform_mby(IF_RTCD(&cpi->rtcd.common->idct),
-                                      &x->e_mbd);
-        }
-        vp8_inverse_transform_mbuv(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd);
+
+        if (xd->mode_info_context->mbmi.mode != B_PRED)
+            vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
+
+        DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
+                        (xd->qcoeff+16*16, xd->block[16].dequant,
+                         xd->dst.u_buffer, xd->dst.v_buffer,
+                         xd->dst.uv_stride, xd->eobs+16);
     }
     else
     {

diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index a3b800a..16393a1 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c

@@ -18,7 +18,6 @@
 #include "vp8/common/invtrans.h"
 #include "vp8/common/recon.h"
 #include "dct.h"
-#include "vp8/common/g_common.h"
 #include "encodeintra.h"
 
 
@@ -45,7 +44,7 @@
 
         vp8_encode_intra16x16mby(rtcd, x);
 
-        vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+        vp8_inverse_transform_mby(&x->e_mbd, IF_RTCD(&cpi->common.rtcd));
     }
     else
     {
@@ -77,8 +76,17 @@
 
     x->quantize_b(be, b);
 
-    vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 16);
-
+    if (*b->eob > 1)
+    {
+        IDCT_INVOKE(IF_RTCD(&rtcd->common->idct), idct16)(b->dqcoeff,
+            b->predictor, 16, *(b->base_dst) + b->dst, b->dst_stride);
+    }
+    else
+    {
+        IDCT_INVOKE(IF_RTCD(&rtcd->common->idct), idct1_scalar_add)
+            (b->dqcoeff[0], b->predictor, 16, *(b->base_dst) + b->dst,
+                b->dst_stride);
+    }
 }
 
 void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
@@ -96,11 +104,12 @@
 void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
     BLOCK *b = &x->block[0];
+    MACROBLOCKD *xd = &x->e_mbd;
 
-    RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby_s)(&x->e_mbd);
 
-    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
-        b->src_stride, x->e_mbd.predictor, 16);
+    ENCODEMB_INVOKE(&rtcd->encodemb, submby) (x->src_diff, *(b->base_src),
+        b->src_stride, xd->dst.y_buffer, xd->dst.y_stride);
 
     vp8_transform_intra_mby(x);
 
@@ -108,16 +117,17 @@
 
     if (x->optimize)
         vp8_optimize_mby(x, rtcd);
-
 }
 
 void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
-    RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd);
+    MACROBLOCKD *xd = &x->e_mbd;
+
+    RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv_s)(&x->e_mbd);
 
     ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer,
-        x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256],
-        &x->e_mbd.predictor[320], 8);
+        x->src.v_buffer, x->src.uv_stride, xd->dst.u_buffer,
+        xd->dst.v_buffer, xd->dst.uv_stride);
 
     vp8_transform_mbuv(x);
 
@@ -125,5 +135,4 @@
 
     if (x->optimize)
         vp8_optimize_mbuv(x, rtcd);
-
 }

diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index e9042e1..c9f7553 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c

@@ -105,10 +105,10 @@
     BLOCK *b = &x->block[0];
 
     ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
-        b->src_stride, x->e_mbd.predictor, 16);
+        b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
     ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer,
-        x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256],
-        &x->e_mbd.predictor[320], 8);
+        x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer,
+        x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride);
 }
 
 static void build_dcblock(MACROBLOCK *x)
@@ -625,7 +625,7 @@
 
 void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
-    vp8_build_inter_predictors_mb_e(&x->e_mbd);
+    vp8_build_inter_predictors_mb(&x->e_mbd);
 
     vp8_subtract_mb(rtcd, x);
 
@@ -635,7 +635,6 @@
 
     if (x->optimize)
         optimize_mb(x, rtcd);
-
 }
 
 /* this funciton is used by first pass only */
@@ -643,15 +642,15 @@
 {
     BLOCK *b = &x->block[0];
 
-    vp8_build_inter16x16_predictors_mby(&x->e_mbd);
+    vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer,
+                                        x->e_mbd.dst.y_stride);
 
     ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
-        b->src_stride, x->e_mbd.predictor, 16);
+        b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
 
     transform_mby(x);
 
     vp8_quantize_mby(x);
 
-    vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
-
+    vp8_inverse_transform_mby(&x->e_mbd, IF_RTCD(rtcd->common));
 }

diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 597a57b..0fa87cf 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h

@@ -12,6 +12,7 @@
 #ifndef __INC_ENCODEMB_H
 #define __INC_ENCODEMB_H
 
+
 #include "vpx_config.h"
 #include "block.h"
 

diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 8c49668..6965598 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c

@@ -38,7 +38,7 @@
 
         if (sem_wait(&cpi->h_event_start_lpf) == 0)
         {
-            if (cpi->b_multi_threaded == FALSE) // we're shutting down
+            if (cpi->b_multi_threaded == 0) // we're shutting down
                 break;
 
             loopfilter_frame(cpi, cm);
@@ -78,7 +78,7 @@
             int *segment_counts = mbri->segment_counts;
             int *totalrate = &mbri->totalrate;
 
-            if (cpi->b_multi_threaded == FALSE) // we're shutting down
+            if (cpi->b_multi_threaded == 0) // we're shutting down
                 break;
 
             for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
@@ -302,7 +302,6 @@
     z->mv_col_max    = x->mv_col_max;
     z->mv_row_min    = x->mv_row_min;
     z->mv_row_max    = x->mv_row_max;
-    z->vector_range = x->vector_range ;
     */
 
     z->vp8_short_fdct4x4     = x->vp8_short_fdct4x4;
@@ -350,8 +349,6 @@
         z->block[i].src             = x->block[i].src;
         */
         z->block[i].src_stride       = x->block[i].src_stride;
-        z->block[i].force_empty      = x->block[i].force_empty;
-
     }
 
     {
@@ -421,8 +418,6 @@
 #endif
         mb->gf_active_ptr            = x->gf_active_ptr;
 
-        mb->vector_range             = 32;
-
         vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
         mbr_ei[i].totalrate = 0;
 

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 8d19d1e..346c06f 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c

@@ -267,8 +267,8 @@
 // Calculate a modified Error used in distributing bits between easier and harder frames
 static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 {
-    double av_err = ( cpi->twopass.total_stats->ssim_weighted_pred_err /
-                      cpi->twopass.total_stats->count );
+    double av_err = ( cpi->twopass.total_stats.ssim_weighted_pred_err /
+                      cpi->twopass.total_stats.count );
     double this_err = this_frame->ssim_weighted_pred_err;
     double modified_err;
 
@@ -373,7 +373,7 @@
     else
     {
         // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
-        max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+        max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
     }
 
     // Trap case where we are out of bits
@@ -385,12 +385,12 @@
 
 void vp8_init_first_pass(VP8_COMP *cpi)
 {
-    zero_stats(cpi->twopass.total_stats);
+    zero_stats(&cpi->twopass.total_stats);
 }
 
 void vp8_end_first_pass(VP8_COMP *cpi)
 {
-    output_stats(cpi, cpi->output_pkt_list, cpi->twopass.total_stats);
+    output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.total_stats);
 }
 
 static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
@@ -804,17 +804,17 @@
                        - cpi->source->ts_start;
 
         // don't want to do output stats with a stack variable!
-        memcpy(cpi->twopass.this_frame_stats,
+        memcpy(&cpi->twopass.this_frame_stats,
                &fps,
                sizeof(FIRSTPASS_STATS));
-        output_stats(cpi, cpi->output_pkt_list, cpi->twopass.this_frame_stats);
-        accumulate_stats(cpi->twopass.total_stats, &fps);
+        output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.this_frame_stats);
+        accumulate_stats(&cpi->twopass.total_stats, &fps);
     }
 
     // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met
     if ((cm->current_video_frame > 0) &&
-        (cpi->twopass.this_frame_stats->pcnt_inter > 0.20) &&
-        ((cpi->twopass.this_frame_stats->intra_error / cpi->twopass.this_frame_stats->coded_error) > 2.0))
+        (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) &&
+        ((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0))
     {
         vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
     }
@@ -861,7 +861,7 @@
 {
     return -(log( prob ) / log( 2.0 ));
 }
-static long long estimate_modemvcost(VP8_COMP *cpi,
+static int64_t estimate_modemvcost(VP8_COMP *cpi,
                                      FIRSTPASS_STATS * fpstats)
 {
     int mv_cost;
@@ -1019,7 +1019,7 @@
     // averaga q observed in clip for non kf/gf.arf frames
     // Give average a chance to settle though.
     if ( (cpi->ni_frames >
-                  ((unsigned int)cpi->twopass.total_stats->count >> 8)) &&
+                  ((unsigned int)cpi->twopass.total_stats.count >> 8)) &&
          (cpi->ni_frames > 150) )
     {
         cpi->twopass.maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality)
@@ -1075,8 +1075,8 @@
     }
 
     // II ratio correction factor for clip as a whole
-    clip_iiratio = cpi->twopass.total_stats->intra_error /
-                   DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats->coded_error);
+    clip_iiratio = cpi->twopass.total_stats.intra_error /
+                   DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error);
     clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025);
     if (clip_iifactor < 0.80)
         clip_iifactor = 0.80;
@@ -1260,25 +1260,25 @@
 
     double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
 
-    zero_stats(cpi->twopass.total_stats);
-    zero_stats(cpi->twopass.total_left_stats);
+    zero_stats(&cpi->twopass.total_stats);
+    zero_stats(&cpi->twopass.total_left_stats);
 
     if (!cpi->twopass.stats_in_end)
         return;
 
-    *cpi->twopass.total_stats = *cpi->twopass.stats_in_end;
-    *cpi->twopass.total_left_stats = *cpi->twopass.total_stats;
+    cpi->twopass.total_stats = *cpi->twopass.stats_in_end;
+    cpi->twopass.total_left_stats = cpi->twopass.total_stats;
 
     // each frame can have a different duration, as the frame rate in the source
     // isn't guaranteed to be constant.   The frame rate prior to the first frame
     // encoded in the second pass is a guess.  However the sum duration is not.
     // Its calculated based on the actual durations of all frames from the first
     // pass.
-    vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats->count / cpi->twopass.total_stats->duration);
+    vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
 
     cpi->output_frame_rate = cpi->frame_rate;
-    cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
-    cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->duration * two_pass_min_rate / 10000000.0);
+    cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
+    cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0);
 
     // Calculate a minimum intra value to be used in determining the IIratio
     // scores used in the second pass. We have this minimum to make sure
@@ -1301,7 +1301,7 @@
             sum_iiratio += IIRatio;
         }
 
-        cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats->count);
+        cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count);
 
         // Reset file position
         reset_fpf_position(cpi, start_pos);
@@ -1376,7 +1376,7 @@
     double loop_decay_rate,
     double decay_accumulator )
 {
-    BOOL trans_to_still = FALSE;
+    int trans_to_still = 0;
 
     // Break clause to detect very still sections after motion
     // For example a static image after a fade or other transition
@@ -1406,7 +1406,7 @@
 
         // Only if it does do we signal a transition to still
         if ( j == still_interval )
-            trans_to_still = TRUE;
+            trans_to_still = 1;
     }
 
     return trans_to_still;
@@ -1415,14 +1415,14 @@
 // This function detects a flash through the high relative pcnt_second_ref
 // score in the frame following a flash frame. The offset passed in should
 // reflect this
-static BOOL detect_flash( VP8_COMP *cpi, int offset )
+static int detect_flash( VP8_COMP *cpi, int offset )
 {
     FIRSTPASS_STATS next_frame;
 
-    BOOL flash_detected = FALSE;
+    int flash_detected = 0;
 
     // Read the frame data.
-    // The return is FALSE (no flash detected) if not a valid frame
+    // The return is 0 (no flash detected) if not a valid frame
     if ( read_frame_stats(cpi, &next_frame, offset) != EOF )
     {
         // What we are looking for here is a situation where there is a
@@ -1433,7 +1433,7 @@
         if ( (next_frame.pcnt_second_ref > next_frame.pcnt_inter) &&
              (next_frame.pcnt_second_ref >= 0.5 ) )
         {
-            flash_detected = TRUE;
+            flash_detected = 1;
 
             /*if (1)
             {
@@ -1548,7 +1548,7 @@
     double mv_in_out_accumulator = 0.0;
     double abs_mv_in_out_accumulator = 0.0;
     double r;
-    BOOL flash_detected = FALSE;
+    int flash_detected = 0;
 
     // Search forward from the proposed arf/next gf position
     for ( i = 0; i < f_frames; i++ )
@@ -1677,7 +1677,7 @@
     int alt_boost = 0;
     int f_boost = 0;
     int b_boost = 0;
-    BOOL flash_detected;
+    int flash_detected;
 
     cpi->twopass.gf_group_bits = 0;
     cpi->twopass.gf_decay_rate = 0;
@@ -1751,7 +1751,7 @@
                                          loop_decay_rate,
                                          decay_accumulator ) )
         {
-            allow_alt_ref = FALSE;
+            allow_alt_ref = 0;
             boost_score = old_boost_score;
             break;
         }
@@ -1923,7 +1923,7 @@
             int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
             int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
 
-            cpi->source_alt_ref_pending = TRUE;
+            cpi->source_alt_ref_pending = 1;
 
             // For alt ref frames the error score for the end frame of the
             // group (the alt ref frame) should not contribute to the group
@@ -1949,7 +1949,7 @@
             // Note: this_frame->frame has been updated in the loop
             // so it now points at the ARF frame.
             half_gf_int = cpi->baseline_gf_interval >> 1;
-            frames_after_arf = cpi->twopass.total_stats->count -
+            frames_after_arf = cpi->twopass.total_stats.count -
                                this_frame->frame - 1;
 
             switch (cpi->oxcf.arnr_type)
@@ -1989,13 +1989,13 @@
         }
         else
         {
-            cpi->source_alt_ref_pending = FALSE;
+            cpi->source_alt_ref_pending = 0;
             cpi->baseline_gf_interval = i;
         }
     }
     else
     {
-        cpi->source_alt_ref_pending = FALSE;
+        cpi->source_alt_ref_pending = 0;
         cpi->baseline_gf_interval = i;
     }
 
@@ -2005,7 +2005,7 @@
     // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
     // This is also important for short clips where there may only be one
     // key frame.
-    if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats->count -
+    if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count -
                                             cpi->common.current_video_frame))
     {
         cpi->twopass.kf_group_bits =
@@ -2296,7 +2296,7 @@
 void vp8_second_pass(VP8_COMP *cpi)
 {
     int tmp_q;
-    int frames_left = (int)(cpi->twopass.total_stats->count - cpi->common.current_video_frame);
+    int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
 
     FIRSTPASS_STATS this_frame = {0};
     FIRSTPASS_STATS this_frame_copy;
@@ -2341,7 +2341,7 @@
             cpi->twopass.gf_group_error_left = cpi->twopass.kf_group_error_left;
             cpi->baseline_gf_interval = cpi->twopass.frames_to_key;
             cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
-            cpi->source_alt_ref_pending = FALSE;
+            cpi->source_alt_ref_pending = 0;
         }
 
     }
@@ -2411,7 +2411,7 @@
 
     // Account for mv, mode and other overheads.
     overhead_bits = estimate_modemvcost(
-                        cpi, cpi->twopass.total_left_stats );
+                        cpi, &cpi->twopass.total_left_stats );
 
     // Special case code for first frame.
     if (cpi->common.current_video_frame == 0)
@@ -2425,7 +2425,7 @@
 
             est_cq =
                 estimate_cq( cpi,
-                             cpi->twopass.total_left_stats,
+                             &cpi->twopass.total_left_stats,
                              (int)(cpi->twopass.bits_left / frames_left),
                              overhead_bits );
 
@@ -2440,7 +2440,7 @@
 
         tmp_q = estimate_max_q(
                     cpi,
-                    cpi->twopass.total_left_stats,
+                    &cpi->twopass.total_left_stats,
                     (int)(cpi->twopass.bits_left / frames_left),
                     overhead_bits );
 
@@ -2463,16 +2463,16 @@
     // radical adjustments to the allowed quantizer range just to use up a
     // few surplus bits or get beneath the target rate.
     else if ( (cpi->common.current_video_frame <
-                 (((unsigned int)cpi->twopass.total_stats->count * 255)>>8)) &&
+                 (((unsigned int)cpi->twopass.total_stats.count * 255)>>8)) &&
               ((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
-                 (unsigned int)cpi->twopass.total_stats->count) )
+                 (unsigned int)cpi->twopass.total_stats.count) )
     {
         if (frames_left < 1)
             frames_left = 1;
 
         tmp_q = estimate_max_q(
                     cpi,
-                    cpi->twopass.total_left_stats,
+                    &cpi->twopass.total_left_stats,
                     (int)(cpi->twopass.bits_left / frames_left),
                     overhead_bits );
 
@@ -2489,13 +2489,13 @@
     cpi->twopass.frames_to_key --;
 
     // Update the total stats remaining sturcture
-    subtract_stats(cpi->twopass.total_left_stats, &this_frame );
+    subtract_stats(&cpi->twopass.total_left_stats, &this_frame );
 }
 
 
-static BOOL test_candidate_kf(VP8_COMP *cpi,  FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, FIRSTPASS_STATS *next_frame)
+static int test_candidate_kf(VP8_COMP *cpi,  FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, FIRSTPASS_STATS *next_frame)
 {
-    BOOL is_viable_kf = FALSE;
+    int is_viable_kf = 0;
 
     // Does the frame satisfy the primary criteria of a key frame
     //      If so, then examine how well it predicts subsequent frames
@@ -2569,13 +2569,13 @@
 
         // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on
         if (boost_score > 5.0 && (i > 3))
-            is_viable_kf = TRUE;
+            is_viable_kf = 1;
         else
         {
             // Reset the file position
             reset_fpf_position(cpi, start_pos);
 
-            is_viable_kf = FALSE;
+            is_viable_kf = 0;
         }
     }
 
@@ -2611,7 +2611,7 @@
     cpi->this_key_frame_forced = cpi->next_key_frame_forced;
 
     // Clear the alt ref active flag as this can never be active on a key frame
-    cpi->source_alt_ref_active = FALSE;
+    cpi->source_alt_ref_active = 0;
 
     // Kf is always a gf so clear frames till next gf counter
     cpi->frames_till_gf_update_due = 0;
@@ -2727,10 +2727,10 @@
         // Reset to the start of the group
         reset_fpf_position(cpi, current_pos);
 
-        cpi->next_key_frame_forced = TRUE;
+        cpi->next_key_frame_forced = 1;
     }
     else
-        cpi->next_key_frame_forced = FALSE;
+        cpi->next_key_frame_forced = 0;
 
     // Special case for the last frame of the file
     if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end)
@@ -3034,8 +3034,8 @@
 
     if (cpi->oxcf.allow_spatial_resampling)
     {
-        int resample_trigger = FALSE;
-        int last_kf_resampled = FALSE;
+        int resample_trigger = 0;
+        int last_kf_resampled = 0;
         int kf_q;
         int scale_val = 0;
         int hr, hs, vr, vs;
@@ -3053,14 +3053,14 @@
         double effective_size_ratio;
 
         if ((cpi->common.Width != cpi->oxcf.Width) || (cpi->common.Height != cpi->oxcf.Height))
-            last_kf_resampled = TRUE;
+            last_kf_resampled = 1;
 
         // Set back to unscaled by defaults
         cpi->common.horiz_scale = NORMAL;
         cpi->common.vert_scale = NORMAL;
 
         // Calculate Average bits per frame.
-        //av_bits_per_frame = cpi->twopass.bits_left/(double)(cpi->twopass.total_stats->count - cpi->common.current_video_frame);
+        //av_bits_per_frame = cpi->twopass.bits_left/(double)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
         av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);
         //if ( av_bits_per_frame < 0.0 )
         //  av_bits_per_frame = 0.0
@@ -3117,21 +3117,21 @@
                 (last_kf_resampled && (projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))))
                 //( ((cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100))) &&
                 //  ((projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))) ))
-                resample_trigger = TRUE;
+                resample_trigger = 1;
             else
-                resample_trigger = FALSE;
+                resample_trigger = 0;
         }
         else
         {
-            int64_t clip_bits = (int64_t)(cpi->twopass.total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
+            int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
             int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
 
             if ((last_kf_resampled && (kf_q > cpi->worst_quality)) ||                                               // If triggered last time the threshold for triggering again is reduced
                 ((kf_q > cpi->worst_quality) &&                                                                  // Projected Q higher than allowed and ...
                  (over_spend > clip_bits / 20)))                                                               // ... Overspend > 5% of total bits
-                resample_trigger = TRUE;
+                resample_trigger = 1;
             else
-                resample_trigger = FALSE;
+                resample_trigger = 0;
 
         }
 

diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c
index b92e82b..3e582e3 100644
--- a/vp8/encoder/lookahead.c
+++ b/vp8/encoder/lookahead.c

@@ -48,7 +48,7 @@
     {
         if(ctx->buf)
         {
-            int i;
+            unsigned int i;
 
             for(i = 0; i < ctx->max_sz; i++)
                 vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img);
@@ -65,7 +65,7 @@
                    unsigned int depth)
 {
     struct lookahead_ctx *ctx = NULL;
-    int i;
+    unsigned int i;
 
     /* Clamp the lookahead queue depth */
     if(depth < 1)
@@ -188,7 +188,7 @@
 
 struct lookahead_entry*
 vp8_lookahead_peek(struct lookahead_ctx *ctx,
-                   int                   index)
+                   unsigned int          index)
 {
     struct lookahead_entry* buf = NULL;
 

diff --git a/vp8/encoder/lookahead.h b/vp8/encoder/lookahead.h
index afb3fd4..32bafcd 100644
--- a/vp8/encoder/lookahead.h
+++ b/vp8/encoder/lookahead.h

@@ -92,7 +92,7 @@
  */
 struct lookahead_entry*
 vp8_lookahead_peek(struct lookahead_ctx *ctx,
-                   int                   index);
+                   unsigned int          index);
 
 
 /**\brief Get the number of frames currently in the lookahead queue

diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 9d96383..735af95 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c

@@ -1156,7 +1156,7 @@
     int tot_steps;
     int_mv this_mv;
 
-    int bestsad = INT_MAX;
+    unsigned int bestsad = UINT_MAX;
     int best_site = 0;
     int last_site = 0;
 
@@ -1397,7 +1397,7 @@
     unsigned char *bestaddress;
     int_mv *best_mv = &d->bmi.mv;
     int_mv this_mv;
-    int bestsad = INT_MAX;
+    unsigned int bestsad = UINT_MAX;
     int r, c;
 
     unsigned char *check_here;
@@ -1527,7 +1527,7 @@
     unsigned char *bestaddress;
     int_mv *best_mv = &d->bmi.mv;
     int_mv this_mv;
-    int bestsad = INT_MAX;
+    unsigned int bestsad = UINT_MAX;
     int r, c;
 
     unsigned char *check_here;

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index c3e7a9f..e3f9519 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c

@@ -23,7 +23,6 @@
 #include "ratectrl.h"
 #include "vp8/common/quant_common.h"
 #include "segmentation.h"
-#include "vp8/common/g_common.h"
 #include "vpx_scale/yv12extend.h"
 #if CONFIG_POSTPROC
 #include "vp8/common/postproc.h"
@@ -70,6 +69,7 @@
 #endif
 
 int vp8_estimate_entropy_savings(VP8_COMP *cpi);
+
 int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
 
 extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi, int distance);
@@ -380,42 +380,25 @@
 
     vpx_free(cpi->mb.pip);
     cpi->mb.pip = 0;
-
-#if !(CONFIG_REALTIME_ONLY)
-    vpx_free(cpi->twopass.total_stats);
-    cpi->twopass.total_stats = 0;
-
-    vpx_free(cpi->twopass.total_left_stats);
-    cpi->twopass.total_left_stats = 0;
-
-    vpx_free(cpi->twopass.this_frame_stats);
-    cpi->twopass.this_frame_stats = 0;
-#endif
 }
 
-static void enable_segmentation(VP8_PTR ptr)
+static void enable_segmentation(VP8_COMP *cpi)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
-
     // Set the appropriate feature bit
     cpi->mb.e_mbd.segmentation_enabled = 1;
     cpi->mb.e_mbd.update_mb_segmentation_map = 1;
     cpi->mb.e_mbd.update_mb_segmentation_data = 1;
 }
-static void disable_segmentation(VP8_PTR ptr)
+static void disable_segmentation(VP8_COMP *cpi)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
-
     // Clear the appropriate feature bit
     cpi->mb.e_mbd.segmentation_enabled = 0;
 }
 
 // Valid values for a segment are 0 to 3
 // Segmentation map is arrange as [Rows][Columns]
-static void set_segmentation_map(VP8_PTR ptr, unsigned char *segmentation_map)
+static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
-
     // Copy in the new segmentation map
     vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
 
@@ -432,19 +415,15 @@
 // abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use the absolute values given).
 //
 //
-static void set_segment_data(VP8_PTR ptr, signed char *feature_data, unsigned char abs_delta)
+static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
-
     cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta;
     vpx_memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data));
 }
 
 
-static void segmentation_test_function(VP8_PTR ptr)
+static void segmentation_test_function(VP8_COMP *cpi)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
-
     unsigned char *seg_map;
     signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
 
@@ -472,10 +451,10 @@
     }*/
 
     // Set the segmentation Map
-    set_segmentation_map(ptr, seg_map);
+    set_segmentation_map(cpi, seg_map);
 
     // Activate segmentation.
-    enable_segmentation(ptr);
+    enable_segmentation(cpi);
 
     // Set up the quant segment data
     feature_data[MB_LVL_ALT_Q][0] = 0;
@@ -490,7 +469,7 @@
 
     // Initialise the feature data structure
     // SEGMENT_DELTADATA    0, SEGMENT_ABSDATA      1
-    set_segment_data(ptr, &feature_data[0][0], SEGMENT_DELTADATA);
+    set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
 
     // Delete sementation map
         vpx_free(seg_map);
@@ -564,10 +543,10 @@
     }
 
     // Set the segmentation Map
-    set_segmentation_map((VP8_PTR)cpi, seg_map);
+    set_segmentation_map(cpi, seg_map);
 
     // Activate segmentation.
-    enable_segmentation((VP8_PTR)cpi);
+    enable_segmentation(cpi);
 
     // Set up the quant segment data
     feature_data[MB_LVL_ALT_Q][0] = 0;
@@ -583,7 +562,7 @@
 
     // Initialise the feature data structure
     // SEGMENT_DELTADATA    0, SEGMENT_ABSDATA      1
-    set_segment_data((VP8_PTR)cpi, &feature_data[0][0], SEGMENT_DELTADATA);
+    set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
 
     // Delete sementation map
     vpx_free(seg_map);
@@ -612,6 +591,93 @@
     cpi->mb.e_mbd.mode_lf_deltas[3] = 4;               // Split mv
 }
 
+/* Convenience macros for mapping speed and mode into a continuous
+ * range
+ */
+#define GOOD(x) (x+1)
+#define RT(x) (x+7)
+
+static int speed_map(int speed, int *map)
+{
+    int res;
+
+    do
+    {
+        res = *map++;
+    } while(speed >= *map++);
+    return res;
+}
+
+static int thresh_mult_map_znn[] = {
+    /* map common to zero, nearest, and near */
+    0, GOOD(2), 1500, GOOD(3), 2000, RT(0), 1000, RT(2), 2000, INT_MAX
+};
+
+static int thresh_mult_map_vhpred[] = {
+    1000, GOOD(2), 1500, GOOD(3), 2000, RT(0), 1000, RT(1), 2000,
+    RT(7), INT_MAX, INT_MAX
+};
+
+static int thresh_mult_map_bpred[] = {
+    2000, GOOD(0), 2500, GOOD(2), 5000, GOOD(3), 7500, RT(0), 2500, RT(1), 5000,
+    RT(6), INT_MAX, INT_MAX
+};
+
+static int thresh_mult_map_tm[] = {
+    1000, GOOD(2), 1500, GOOD(3), 2000, RT(0), 0, RT(1), 1000, RT(2), 2000,
+    RT(7), INT_MAX, INT_MAX
+};
+
+static int thresh_mult_map_new1[] = {
+    1000, GOOD(2), 2000, RT(0), 2000, INT_MAX
+};
+
+static int thresh_mult_map_new2[] = {
+    1000, GOOD(2), 2000, GOOD(3), 2500, GOOD(5), 4000, RT(0), 2000, RT(2), 2500,
+    RT(5), 4000, INT_MAX
+};
+
+static int thresh_mult_map_split1[] = {
+    2500, GOOD(0), 1700, GOOD(2), 10000, GOOD(3), 25000, GOOD(4), INT_MAX,
+    RT(0), 5000, RT(1), 10000, RT(2), 25000, RT(3), INT_MAX, INT_MAX
+};
+
+static int thresh_mult_map_split2[] = {
+    5000, GOOD(0), 4500, GOOD(2), 20000, GOOD(3), 50000, GOOD(4), INT_MAX,
+    RT(0), 10000, RT(1), 20000, RT(2), 50000, RT(3), INT_MAX, INT_MAX
+};
+
+static int mode_check_freq_map_zn2[] = {
+    /* {zero,nearest}{2,3} */
+    0, RT(10), 1<<1, RT(11), 1<<2, RT(12), 1<<3, INT_MAX
+};
+
+static int mode_check_freq_map_vhbpred[] = {
+    0, GOOD(5), 2, RT(0), 0, RT(3), 2, RT(5), 4, INT_MAX
+};
+
+static int mode_check_freq_map_near2[] = {
+    0, GOOD(5), 2, RT(0), 0, RT(3), 2, RT(10), 1<<2, RT(11), 1<<3, RT(12), 1<<4,
+    INT_MAX
+};
+
+static int mode_check_freq_map_new1[] = {
+    0, RT(10), 1<<1, RT(11), 1<<2, RT(12), 1<<3, INT_MAX
+};
+
+static int mode_check_freq_map_new2[] = {
+    0, GOOD(5), 4, RT(0), 0, RT(3), 4, RT(10), 1<<3, RT(11), 1<<4, RT(12), 1<<5,
+    INT_MAX
+};
+
+static int mode_check_freq_map_split1[] = {
+    0, GOOD(2), 2, GOOD(3), 7, RT(1), 2, RT(2), 7, INT_MAX
+};
+
+static int mode_check_freq_map_split2[] = {
+    0, GOOD(1), 2, GOOD(2), 4, GOOD(3), 15, RT(1), 4, RT(2), 15, INT_MAX
+};
+
 void vp8_set_speed_features(VP8_COMP *cpi)
 {
     SPEED_FEATURES *sf = &cpi->sf;
@@ -663,93 +729,81 @@
     if (cpi->ref_frame_flags & VP8_ALT_FLAG)
         ref_frames++;
 
+    /* Convert speed to continuous range, with clamping */
+    if (Mode == 0)
+        Speed = 0;
+    else if (Mode == 2)
+        Speed = RT(Speed);
+    else
+    {
+        if (Speed > 5)
+            Speed = 5;
+        Speed = GOOD(Speed);
+    }
+
+    sf->thresh_mult[THR_ZERO1] =
+    sf->thresh_mult[THR_NEAREST1] =
+    sf->thresh_mult[THR_NEAR1] =
+    sf->thresh_mult[THR_DC] = 0; /* always */
+
+    sf->thresh_mult[THR_ZERO2] =
+    sf->thresh_mult[THR_ZERO3] =
+    sf->thresh_mult[THR_NEAREST2] =
+    sf->thresh_mult[THR_NEAREST3] =
+    sf->thresh_mult[THR_NEAR2]  =
+    sf->thresh_mult[THR_NEAR3]  = speed_map(Speed, thresh_mult_map_znn);
+
+    sf->thresh_mult[THR_V_PRED] =
+    sf->thresh_mult[THR_H_PRED] = speed_map(Speed, thresh_mult_map_vhpred);
+    sf->thresh_mult[THR_B_PRED] = speed_map(Speed, thresh_mult_map_bpred);
+    sf->thresh_mult[THR_TM]     = speed_map(Speed, thresh_mult_map_tm);
+    sf->thresh_mult[THR_NEW1]   = speed_map(Speed, thresh_mult_map_new1);
+    sf->thresh_mult[THR_NEW2]   =
+    sf->thresh_mult[THR_NEW3]   = speed_map(Speed, thresh_mult_map_new2);
+    sf->thresh_mult[THR_SPLIT1] = speed_map(Speed, thresh_mult_map_split1);
+    sf->thresh_mult[THR_SPLIT2] =
+    sf->thresh_mult[THR_SPLIT3] = speed_map(Speed, thresh_mult_map_split2);
+
+    cpi->mode_check_freq[THR_ZERO1] =
+    cpi->mode_check_freq[THR_NEAREST1] =
+    cpi->mode_check_freq[THR_NEAR1] =
+    cpi->mode_check_freq[THR_TM]     =
+    cpi->mode_check_freq[THR_DC] = 0; /* always */
+
+    cpi->mode_check_freq[THR_ZERO2] =
+    cpi->mode_check_freq[THR_ZERO3] =
+    cpi->mode_check_freq[THR_NEAREST2] =
+    cpi->mode_check_freq[THR_NEAREST3] = speed_map(Speed,
+                                                   mode_check_freq_map_zn2);
+
+    cpi->mode_check_freq[THR_NEAR2]  =
+    cpi->mode_check_freq[THR_NEAR3]  = speed_map(Speed,
+                                                 mode_check_freq_map_near2);
+
+    cpi->mode_check_freq[THR_V_PRED] =
+    cpi->mode_check_freq[THR_H_PRED] =
+    cpi->mode_check_freq[THR_B_PRED] = speed_map(Speed,
+                                                 mode_check_freq_map_vhbpred);
+    cpi->mode_check_freq[THR_NEW1]   = speed_map(Speed,
+                                                 mode_check_freq_map_new1);
+    cpi->mode_check_freq[THR_NEW2]   =
+    cpi->mode_check_freq[THR_NEW3]   = speed_map(Speed,
+                                                 mode_check_freq_map_new2);
+    cpi->mode_check_freq[THR_SPLIT1] = speed_map(Speed,
+                                                 mode_check_freq_map_split1);
+    cpi->mode_check_freq[THR_SPLIT2] =
+    cpi->mode_check_freq[THR_SPLIT3] = speed_map(Speed,
+                                                 mode_check_freq_map_split2);
+    Speed = cpi->Speed;
     switch (Mode)
     {
 #if !(CONFIG_REALTIME_ONLY)
     case 0: // best quality mode
-        sf->thresh_mult[THR_ZERO1    ] = 0;
-        sf->thresh_mult[THR_ZERO2    ] = 0;
-        sf->thresh_mult[THR_ZERO3    ] = 0;
-        sf->thresh_mult[THR_NEAREST1 ] = 0;
-        sf->thresh_mult[THR_NEAREST2 ] = 0;
-        sf->thresh_mult[THR_NEAREST3 ] = 0;
-        sf->thresh_mult[THR_NEAR1    ] = 0;
-        sf->thresh_mult[THR_NEAR2    ] = 0;
-        sf->thresh_mult[THR_NEAR3    ] = 0;
-
-        sf->thresh_mult[THR_DC       ] = 0;
-
-        sf->thresh_mult[THR_V_PRED   ] = 1000;
-        sf->thresh_mult[THR_H_PRED   ] = 1000;
-        sf->thresh_mult[THR_B_PRED   ] = 2000;
-        sf->thresh_mult[THR_TM       ] = 1000;
-
-        sf->thresh_mult[THR_NEW1     ] = 1000;
-        sf->thresh_mult[THR_NEW2     ] = 1000;
-        sf->thresh_mult[THR_NEW3     ] = 1000;
-
-        sf->thresh_mult[THR_SPLIT1   ] = 2500;
-        sf->thresh_mult[THR_SPLIT2   ] = 5000;
-        sf->thresh_mult[THR_SPLIT3   ] = 5000;
-
-
         sf->first_step = 0;
         sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
         break;
     case 1:
     case 3:
-        sf->thresh_mult[THR_NEAREST1 ] = 0;
-        sf->thresh_mult[THR_ZERO1    ] = 0;
-        sf->thresh_mult[THR_DC       ] = 0;
-        sf->thresh_mult[THR_NEAR1    ] = 0;
-        sf->thresh_mult[THR_V_PRED   ] = 1000;
-        sf->thresh_mult[THR_H_PRED   ] = 1000;
-        sf->thresh_mult[THR_B_PRED   ] = 2500;
-        sf->thresh_mult[THR_TM       ] = 1000;
-
-        sf->thresh_mult[THR_NEAREST2 ] = 1000;
-        sf->thresh_mult[THR_NEAREST3 ] = 1000;
-
-        sf->thresh_mult[THR_ZERO2    ] = 1000;
-        sf->thresh_mult[THR_ZERO3    ] = 1000;
-        sf->thresh_mult[THR_NEAR2    ] = 1000;
-        sf->thresh_mult[THR_NEAR3    ] = 1000;
-
-#if 1
-        sf->thresh_mult[THR_ZERO1    ] = 0;
-        sf->thresh_mult[THR_ZERO2    ] = 0;
-        sf->thresh_mult[THR_ZERO3    ] = 0;
-        sf->thresh_mult[THR_NEAREST1 ] = 0;
-        sf->thresh_mult[THR_NEAREST2 ] = 0;
-        sf->thresh_mult[THR_NEAREST3 ] = 0;
-        sf->thresh_mult[THR_NEAR1    ] = 0;
-        sf->thresh_mult[THR_NEAR2    ] = 0;
-        sf->thresh_mult[THR_NEAR3    ] = 0;
-
-//        sf->thresh_mult[THR_DC       ] = 0;
-
-//        sf->thresh_mult[THR_V_PRED   ] = 1000;
-//        sf->thresh_mult[THR_H_PRED   ] = 1000;
-//        sf->thresh_mult[THR_B_PRED   ] = 2000;
-//        sf->thresh_mult[THR_TM       ] = 1000;
-
-        sf->thresh_mult[THR_NEW1     ] = 1000;
-        sf->thresh_mult[THR_NEW2     ] = 1000;
-        sf->thresh_mult[THR_NEW3     ] = 1000;
-
-        sf->thresh_mult[THR_SPLIT1   ] = 1700;
-        sf->thresh_mult[THR_SPLIT2   ] = 4500;
-        sf->thresh_mult[THR_SPLIT3   ] = 4500;
-#else
-        sf->thresh_mult[THR_NEW1     ] = 1500;
-        sf->thresh_mult[THR_NEW2     ] = 1500;
-        sf->thresh_mult[THR_NEW3     ] = 1500;
-
-        sf->thresh_mult[THR_SPLIT1   ] = 5000;
-        sf->thresh_mult[THR_SPLIT2   ] = 10000;
-        sf->thresh_mult[THR_SPLIT3   ] = 10000;
-#endif
-
         if (Speed > 0)
         {
             /* Disable coefficient optimization above speed 0 */
@@ -758,83 +812,10 @@
             sf->no_skip_block4x4_search = 0;
 
             sf->first_step = 1;
-
-            cpi->mode_check_freq[THR_SPLIT2] = 2;
-            cpi->mode_check_freq[THR_SPLIT3] = 2;
-            cpi->mode_check_freq[THR_SPLIT1 ] = 0;
-        }
-
-        if (Speed > 1)
-        {
-            cpi->mode_check_freq[THR_SPLIT2] = 4;
-            cpi->mode_check_freq[THR_SPLIT3] = 4;
-            cpi->mode_check_freq[THR_SPLIT1 ] = 2;
-
-            sf->thresh_mult[THR_TM       ] = 1500;
-            sf->thresh_mult[THR_V_PRED   ] = 1500;
-            sf->thresh_mult[THR_H_PRED   ] = 1500;
-            sf->thresh_mult[THR_B_PRED   ] = 5000;
-
-            if (ref_frames > 1)
-            {
-                sf->thresh_mult[THR_NEW1     ] = 2000;
-                sf->thresh_mult[THR_SPLIT1   ] = 10000;
-            }
-
-            if (ref_frames > 2)
-            {
-                sf->thresh_mult[THR_NEAREST2 ] = 1500;
-                sf->thresh_mult[THR_ZERO2    ] = 1500;
-                sf->thresh_mult[THR_NEAR2    ] = 1500;
-                sf->thresh_mult[THR_NEW2     ] = 2000;
-                sf->thresh_mult[THR_SPLIT2   ] = 20000;
-            }
-
-            if (ref_frames > 3)
-            {
-                sf->thresh_mult[THR_NEAREST3 ] = 1500;
-                sf->thresh_mult[THR_ZERO3    ] = 1500;
-                sf->thresh_mult[THR_NEAR3    ] = 1500;
-                sf->thresh_mult[THR_NEW3     ] = 2000;
-                sf->thresh_mult[THR_SPLIT3   ] = 20000;
-            }
         }
 
         if (Speed > 2)
         {
-            cpi->mode_check_freq[THR_SPLIT2] = 15;
-            cpi->mode_check_freq[THR_SPLIT3] = 15;
-            cpi->mode_check_freq[THR_SPLIT1 ] = 7;
-
-            sf->thresh_mult[THR_TM       ] = 2000;
-            sf->thresh_mult[THR_V_PRED   ] = 2000;
-            sf->thresh_mult[THR_H_PRED   ] = 2000;
-            sf->thresh_mult[THR_B_PRED   ] = 7500;
-
-            if (ref_frames > 1)
-            {
-                sf->thresh_mult[THR_NEW1     ] = 2000;
-                sf->thresh_mult[THR_SPLIT1   ] = 25000;
-            }
-
-            if (ref_frames > 2)
-            {
-                sf->thresh_mult[THR_NEAREST2 ] = 2000;
-                sf->thresh_mult[THR_ZERO2    ] = 2000;
-                sf->thresh_mult[THR_NEAR2    ] = 2000;
-                sf->thresh_mult[THR_NEW2     ] = 2500;
-                sf->thresh_mult[THR_SPLIT2   ] = 50000;
-            }
-
-            if (ref_frames > 3)
-            {
-                sf->thresh_mult[THR_NEAREST3 ] = 2000;
-                sf->thresh_mult[THR_ZERO3    ] = 2000;
-                sf->thresh_mult[THR_NEAR3    ] = 2000;
-                sf->thresh_mult[THR_NEW3     ] = 2500;
-                sf->thresh_mult[THR_SPLIT3   ] = 50000;
-            }
-
             sf->improved_quant = 0;
             sf->improved_dct = 0;
 
@@ -846,18 +827,6 @@
 
         if (Speed > 3)
         {
-            sf->thresh_mult[THR_SPLIT3  ] = INT_MAX;
-            sf->thresh_mult[THR_SPLIT2  ] = INT_MAX;
-            sf->thresh_mult[THR_SPLIT1   ] = INT_MAX;
-
-            cpi->mode_check_freq[THR_V_PRED] = 0;
-            cpi->mode_check_freq[THR_H_PRED] = 0;
-            cpi->mode_check_freq[THR_B_PRED] = 0;
-            cpi->mode_check_freq[THR_NEAR2] = 0;
-            cpi->mode_check_freq[THR_NEW2] = 0;
-            cpi->mode_check_freq[THR_NEAR3] = 0;
-            cpi->mode_check_freq[THR_NEW3] = 0;
-
             sf->auto_filter = 1;
             sf->recode_loop = 0; // recode loop off
             sf->RD = 0;         // Turn rd off
@@ -867,38 +836,6 @@
         if (Speed > 4)
         {
             sf->auto_filter = 0;                     // Faster selection of loop filter
-
-            cpi->mode_check_freq[THR_V_PRED] = 2;
-            cpi->mode_check_freq[THR_H_PRED] = 2;
-            cpi->mode_check_freq[THR_B_PRED] = 2;
-
-            if (ref_frames > 2)
-            {
-                cpi->mode_check_freq[THR_NEAR2] = 2;
-                cpi->mode_check_freq[THR_NEW2] = 4;
-            }
-
-            if (ref_frames > 3)
-            {
-                cpi->mode_check_freq[THR_NEAR3] = 2;
-                cpi->mode_check_freq[THR_NEW3] = 4;
-            }
-
-            if (ref_frames > 2)
-            {
-                sf->thresh_mult[THR_NEAREST2 ] = 2000;
-                sf->thresh_mult[THR_ZERO2    ] = 2000;
-                sf->thresh_mult[THR_NEAR2    ] = 2000;
-                sf->thresh_mult[THR_NEW2     ] = 4000;
-            }
-
-            if (ref_frames > 3)
-            {
-                sf->thresh_mult[THR_NEAREST3 ] = 2000;
-                sf->thresh_mult[THR_ZERO3    ] = 2000;
-                sf->thresh_mult[THR_NEAR3    ] = 2000;
-                sf->thresh_mult[THR_NEW3     ] = 4000;
-            }
         }
 
         break;
@@ -908,67 +845,10 @@
         sf->recode_loop = 0;
         sf->auto_filter = 1;
         sf->iterative_sub_pixel = 1;
-        sf->thresh_mult[THR_NEAREST1 ] = 0;
-        sf->thresh_mult[THR_ZERO1    ] = 0;
-        sf->thresh_mult[THR_DC       ] = 0;
-        sf->thresh_mult[THR_TM       ] = 0;
-        sf->thresh_mult[THR_NEAR1    ] = 0;
-        sf->thresh_mult[THR_V_PRED   ] = 1000;
-        sf->thresh_mult[THR_H_PRED   ] = 1000;
-        sf->thresh_mult[THR_B_PRED   ] = 2500;
-        sf->thresh_mult[THR_NEAREST2 ] = 1000;
-        sf->thresh_mult[THR_ZERO2    ] = 1000;
-        sf->thresh_mult[THR_NEAR2    ] = 1000;
-        sf->thresh_mult[THR_NEAREST3 ] = 1000;
-        sf->thresh_mult[THR_ZERO3    ] = 1000;
-        sf->thresh_mult[THR_NEAR3    ] = 1000;
-        sf->thresh_mult[THR_NEW1     ] = 2000;
-        sf->thresh_mult[THR_NEW2     ] = 2000;
-        sf->thresh_mult[THR_NEW3     ] = 2000;
-        sf->thresh_mult[THR_SPLIT1   ] = 5000;
-        sf->thresh_mult[THR_SPLIT2   ] = 10000;
-        sf->thresh_mult[THR_SPLIT3   ] = 10000;
         sf->search_method = NSTEP;
 
         if (Speed > 0)
         {
-            cpi->mode_check_freq[THR_SPLIT2] = 4;
-            cpi->mode_check_freq[THR_SPLIT3] = 4;
-            cpi->mode_check_freq[THR_SPLIT1 ] = 2;
-
-            sf->thresh_mult[THR_DC       ] = 0;
-            sf->thresh_mult[THR_TM       ] = 1000;
-            sf->thresh_mult[THR_V_PRED   ] = 2000;
-            sf->thresh_mult[THR_H_PRED   ] = 2000;
-            sf->thresh_mult[THR_B_PRED   ] = 5000;
-
-            if (ref_frames > 1)
-            {
-                sf->thresh_mult[THR_NEAREST1 ] = 0;
-                sf->thresh_mult[THR_ZERO1    ] = 0;
-                sf->thresh_mult[THR_NEAR1    ] = 0;
-                sf->thresh_mult[THR_NEW1     ] = 2000;
-                sf->thresh_mult[THR_SPLIT1   ] = 10000;
-            }
-
-            if (ref_frames > 2)
-            {
-                sf->thresh_mult[THR_NEAREST2 ] = 1000;
-                sf->thresh_mult[THR_ZERO2    ] = 1000;
-                sf->thresh_mult[THR_NEAR2    ] = 1000;
-                sf->thresh_mult[THR_NEW2     ] = 2000;
-                sf->thresh_mult[THR_SPLIT2   ] = 20000;
-            }
-
-            if (ref_frames > 3)
-            {
-                sf->thresh_mult[THR_NEAREST3 ] = 1000;
-                sf->thresh_mult[THR_ZERO3    ] = 1000;
-                sf->thresh_mult[THR_NEAR3    ] = 1000;
-                sf->thresh_mult[THR_NEW3     ] = 2000;
-                sf->thresh_mult[THR_SPLIT3   ] = 20000;
-            }
-
             sf->improved_quant = 0;
             sf->improved_dct = 0;
 
@@ -977,133 +857,28 @@
             sf->first_step = 1;
         }
 
-        if (Speed > 1)
-        {
-            cpi->mode_check_freq[THR_SPLIT1 ] = 7;
-            cpi->mode_check_freq[THR_SPLIT2] = 15;
-            cpi->mode_check_freq[THR_SPLIT3] = 15;
-
-            sf->thresh_mult[THR_TM       ] = 2000;
-            sf->thresh_mult[THR_V_PRED   ] = 2000;
-            sf->thresh_mult[THR_H_PRED   ] = 2000;
-            sf->thresh_mult[THR_B_PRED   ] = 5000;
-
-            if (ref_frames > 1)
-            {
-                sf->thresh_mult[THR_NEW1     ] = 2000;
-                sf->thresh_mult[THR_SPLIT1   ] = 25000;
-            }
-
-            if (ref_frames > 2)
-            {
-                sf->thresh_mult[THR_NEAREST2 ] = 2000;
-                sf->thresh_mult[THR_ZERO2    ] = 2000;
-                sf->thresh_mult[THR_NEAR2    ] = 2000;
-                sf->thresh_mult[THR_NEW2     ] = 2500;
-                sf->thresh_mult[THR_SPLIT2   ] = 50000;
-            }
-
-            if (ref_frames > 3)
-            {
-                sf->thresh_mult[THR_NEAREST3 ] = 2000;
-                sf->thresh_mult[THR_ZERO3    ] = 2000;
-                sf->thresh_mult[THR_NEAR3    ] = 2000;
-                sf->thresh_mult[THR_NEW3     ] = 2500;
-                sf->thresh_mult[THR_SPLIT3   ] = 50000;
-            }
-
-        }
-
         if (Speed > 2)
-        {
             sf->auto_filter = 0;                     // Faster selection of loop filter
 
-            cpi->mode_check_freq[THR_V_PRED] = 2;
-            cpi->mode_check_freq[THR_H_PRED] = 2;
-            cpi->mode_check_freq[THR_B_PRED] = 2;
-
-            if (ref_frames > 2)
-            {
-                cpi->mode_check_freq[THR_NEAR2] = 2;
-                cpi->mode_check_freq[THR_NEW2] = 4;
-            }
-
-            if (ref_frames > 3)
-            {
-                cpi->mode_check_freq[THR_NEAR3] = 2;
-                cpi->mode_check_freq[THR_NEW3] = 4;
-            }
-
-            sf->thresh_mult[THR_SPLIT1   ] = INT_MAX;
-            sf->thresh_mult[THR_SPLIT2  ] = INT_MAX;
-            sf->thresh_mult[THR_SPLIT3  ] = INT_MAX;
-
-        }
-
         if (Speed > 3)
         {
             sf->RD = 0;
-
             sf->auto_filter = 1;
         }
 
         if (Speed > 4)
         {
             sf->auto_filter = 0;                     // Faster selection of loop filter
-
             sf->search_method = HEX;
-            //sf->search_method = DIAMOND;
-
             sf->iterative_sub_pixel = 0;
-
-            cpi->mode_check_freq[THR_V_PRED] = 4;
-            cpi->mode_check_freq[THR_H_PRED] = 4;
-            cpi->mode_check_freq[THR_B_PRED] = 4;
-
-            if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
-            {
-                cpi->mode_check_freq[THR_NEAR2] = 2;
-                cpi->mode_check_freq[THR_NEW2] = 4;
-            }
-
-            if (cpi->ref_frame_flags & VP8_ALT_FLAG)
-            {
-                cpi->mode_check_freq[THR_NEAR3] = 2;
-                cpi->mode_check_freq[THR_NEW3] = 4;
-            }
-
-            sf->thresh_mult[THR_TM       ] = 2000;
-            sf->thresh_mult[THR_B_PRED   ] = 5000;
-
-            if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
-            {
-                sf->thresh_mult[THR_NEAREST2 ] = 2000;
-                sf->thresh_mult[THR_ZERO2    ] = 2000;
-                sf->thresh_mult[THR_NEAR2    ] = 2000;
-                sf->thresh_mult[THR_NEW2     ] = 4000;
-            }
-
-            if (cpi->ref_frame_flags & VP8_ALT_FLAG)
-            {
-                sf->thresh_mult[THR_NEAREST3 ] = 2000;
-                sf->thresh_mult[THR_ZERO3    ] = 2000;
-                sf->thresh_mult[THR_NEAR3    ] = 2000;
-                sf->thresh_mult[THR_NEW3     ] = 4000;
-            }
-        }
-
-        if (Speed > 5)
-        {
-            // Disable split MB intra prediction mode
-            sf->thresh_mult[THR_B_PRED] = INT_MAX;
         }
 
         if (Speed > 6)
         {
-            unsigned int i, sum = 0;
+            unsigned int sum = 0;
             unsigned int total_mbs = cm->MBs;
-            int thresh;
-            int total_skip;
+            int i, thresh;
+            unsigned int total_skip;
 
             int min = 2000;
 
@@ -1156,55 +931,28 @@
                 sf->thresh_mult[THR_NEAR3    ] = thresh;
             }
 
-            // Disable other intra prediction modes
-            sf->thresh_mult[THR_TM] = INT_MAX;
-            sf->thresh_mult[THR_V_PRED] = INT_MAX;
-            sf->thresh_mult[THR_H_PRED] = INT_MAX;
-
             sf->improved_mv_pred = 0;
         }
 
         if (Speed > 8)
-        {
             sf->quarter_pixel_search = 0;
-        }
 
-        if (Speed > 9)
+        if(cm->version == 0)
         {
-            int Tmp = cpi->Speed - 8;
+            cm->filter_type = NORMAL_LOOPFILTER;
 
-            if (Tmp > 4)
-                Tmp = 4;
-
-            if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
-            {
-                cpi->mode_check_freq[THR_ZERO2] = 1 << (Tmp - 1);
-                cpi->mode_check_freq[THR_NEAREST2] = 1 << (Tmp - 1);
-                cpi->mode_check_freq[THR_NEAR2] = 1 << Tmp;
-                cpi->mode_check_freq[THR_NEW2] = 1 << (Tmp + 1);
-            }
-
-            if (cpi->ref_frame_flags & VP8_ALT_FLAG)
-            {
-                cpi->mode_check_freq[THR_ZERO3] = 1 << (Tmp - 1);
-                cpi->mode_check_freq[THR_NEAREST3] = 1 << (Tmp - 1);
-                cpi->mode_check_freq[THR_NEAR3] = 1 << Tmp;
-                cpi->mode_check_freq[THR_NEW3] = 1 << (Tmp + 1);
-            }
-
-            cpi->mode_check_freq[THR_NEW1 ] = 1 << (Tmp - 1);
+            if (Speed >= 14)
+                cm->filter_type = SIMPLE_LOOPFILTER;
         }
-
-        cm->filter_type = NORMAL_LOOPFILTER;
-
-        if (Speed >= 14)
+        else
+        {
             cm->filter_type = SIMPLE_LOOPFILTER;
-
-        if (Speed >= 15)
-        {
-            sf->half_pixel_search = 0;        // This has a big hit on quality. Last resort
         }
 
+        // This has a big hit on quality. Last resort
+        if (Speed >= 15)
+            sf->half_pixel_search = 0;
+
         vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins));
 
     }; /* switch */
@@ -1290,6 +1038,9 @@
     frames_at_speed[cpi->Speed]++;
 #endif
 }
+#undef GOOD
+#undef RT
+
 static void alloc_raw_frame_buffers(VP8_COMP *cpi)
 {
     int width = (cpi->oxcf.Width + 15) & ~15;
@@ -1390,25 +1141,6 @@
                     vpx_calloc(sizeof(unsigned int),
                     cm->mb_rows * cm->mb_cols));
 
-#if !(CONFIG_REALTIME_ONLY)
-        vpx_free(cpi->twopass.total_stats);
-
-    cpi->twopass.total_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
-
-    vpx_free(cpi->twopass.total_left_stats);
-    cpi->twopass.total_left_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
-
-        vpx_free(cpi->twopass.this_frame_stats);
-
-    cpi->twopass.this_frame_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
-
-    if( !cpi->twopass.total_stats ||
-        !cpi->twopass.total_left_stats ||
-        !cpi->twopass.this_frame_stats)
-        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
-                           "Failed to allocate firstpass stats");
-#endif
-
 #if CONFIG_MULTITHREAD
     if (width < 640)
         cpi->mt_sync_range = 1;
@@ -1497,9 +1229,8 @@
 }
 
 
-static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
+static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
     VP8_COMMON *cm = &cpi->common;
 
     cpi->oxcf = *oxcf;
@@ -1524,7 +1255,7 @@
         cpi->frame_rate = 30;
 
     // change includes all joint functionality
-    vp8_change_config(ptr, oxcf);
+    vp8_change_config(cpi, oxcf);
 
     // Initialize active best and worst q and average q values.
     cpi->active_worst_quality         = cpi->oxcf.worst_allowed_q;
@@ -1546,8 +1277,8 @@
     // Temporal scalabilty
     if (cpi->oxcf.number_of_layers > 1)
     {
-        int i;
-        int prev_layer_frame_rate=0;
+        unsigned int i;
+        double prev_layer_frame_rate=0;
 
         for (i=0; i<cpi->oxcf.number_of_layers; i++)
         {
@@ -1615,9 +1346,8 @@
 }
 
 
-void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
+void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
     VP8_COMMON *cm = &cpi->common;
 
     if (!cpi)
@@ -1809,7 +1539,7 @@
       cpi->active_best_quality = cpi->oxcf.worst_allowed_q;
     }
 
-    cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
+    cpi->buffered_mode = cpi->oxcf.optimal_buffer_level > 0;
 
     cpi->cq_target_quality = cpi->oxcf.cq_level;
 
@@ -1908,19 +1638,14 @@
     while (++i <= mvfp_max);
 }
 
-VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
+struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
 {
     int i;
-    volatile union
-    {
-        VP8_COMP *cpi;
-        VP8_PTR   ptr;
-    } ctx;
 
     VP8_COMP *cpi;
     VP8_COMMON *cm;
 
-    cpi = ctx.cpi = vpx_memalign(32, sizeof(VP8_COMP));
+    cpi = vpx_memalign(32, sizeof(VP8_COMP));
     // Check that the CPI instance is valid
     if (!cpi)
         return 0;
@@ -1931,10 +1656,8 @@
 
     if (setjmp(cm->error.jmp))
     {
-        VP8_PTR ptr = ctx.ptr;
-
-        ctx.cpi->common.error.setjmp = 0;
-        vp8_remove_compressor(&ptr);
+        cpi->common.error.setjmp = 0;
+        vp8_remove_compressor(&cpi);
         return 0;
     }
 
@@ -1945,7 +1668,7 @@
     vp8_create_common(&cpi->common);
     vp8_cmachine_specific_config(cpi);
 
-    init_config((VP8_PTR)cpi, oxcf);
+    init_config(cpi, oxcf);
 
     memcpy(cpi->base_skip_false_prob, vp8cx_base_skip_false_prob, sizeof(vp8cx_base_skip_false_prob));
     cpi->common.current_video_frame   = 0;
@@ -2024,7 +1747,7 @@
         cpi->cyclic_refresh_map = (signed char *) NULL;
 
     // Test function for segmentation
-    //segmentation_test_function((VP8_PTR) cpi);
+    //segmentation_test_function( cpi);
 
 #ifdef ENTROPY_STATS
     init_context_counters();
@@ -2035,11 +1758,11 @@
 
     cpi->frames_since_key = 8;        // Give a sensible default for the first frame.
     cpi->key_frame_frequency = cpi->oxcf.key_freq;
-    cpi->this_key_frame_forced = FALSE;
-    cpi->next_key_frame_forced = FALSE;
+    cpi->this_key_frame_forced = 0;
+    cpi->next_key_frame_forced = 0;
 
-    cpi->source_alt_ref_pending = FALSE;
-    cpi->source_alt_ref_active = FALSE;
+    cpi->source_alt_ref_pending = 0;
+    cpi->source_alt_ref_active = 0;
     cpi->common.refresh_alt_ref_frame = 0;
 
     cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
@@ -2244,14 +1967,14 @@
         vp8_cal_low_res_mb_cols(cpi);
 #endif
 
-    return (VP8_PTR) cpi;
+    return  cpi;
 
 }
 
 
-void vp8_remove_compressor(VP8_PTR *ptr)
+void vp8_remove_compressor(VP8_COMP **ptr)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(*ptr);
+    VP8_COMP *cpi = *ptr;
 
     if (!cpi)
         return;
@@ -2662,20 +2385,16 @@
 }
 
 
-int vp8_use_as_reference(VP8_PTR ptr, int ref_frame_flags)
+int vp8_use_as_reference(VP8_COMP *cpi, int ref_frame_flags)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
-
     if (ref_frame_flags > 7)
         return -1 ;
 
     cpi->ref_frame_flags = ref_frame_flags;
     return 0;
 }
-int vp8_update_reference(VP8_PTR ptr, int ref_frame_flags)
+int vp8_update_reference(VP8_COMP *cpi, int ref_frame_flags)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
-
     if (ref_frame_flags > 7)
         return -1 ;
 
@@ -2695,9 +2414,8 @@
     return 0;
 }
 
-int vp8_get_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
     VP8_COMMON *cm = &cpi->common;
     int ref_fb_idx;
 
@@ -2714,9 +2432,8 @@
 
     return 0;
 }
-int vp8_set_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+int vp8_set_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
 {
-    VP8_COMP *cpi = (VP8_COMP *)(ptr);
     VP8_COMMON *cm = &cpi->common;
 
     int ref_fb_idx;
@@ -2734,9 +2451,8 @@
 
     return 0;
 }
-int vp8_update_entropy(VP8_PTR comp, int update)
+int vp8_update_entropy(VP8_COMP *cpi, int update)
 {
-    VP8_COMP *cpi = (VP8_COMP *) comp;
     VP8_COMMON *cm = &cpi->common;
     cm->refresh_entropy_probs = update;
 
@@ -2892,10 +2608,10 @@
     cpi->common.frames_since_golden = 0;
 
     // Clear the alternate reference update pending flag.
-    cpi->source_alt_ref_pending = FALSE;
+    cpi->source_alt_ref_pending = 0;
 
     // Set the alternate refernce frame active flag
-    cpi->source_alt_ref_active = TRUE;
+    cpi->source_alt_ref_active = 1;
 
 
 }
@@ -2958,12 +2674,12 @@
         if (cpi->oxcf.fixed_q >= 0 &&
             cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame)
         {
-            cpi->source_alt_ref_pending = TRUE;
+            cpi->source_alt_ref_pending = 1;
             cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
         }
 
         if (!cpi->source_alt_ref_pending)
-            cpi->source_alt_ref_active = FALSE;
+            cpi->source_alt_ref_active = 0;
 
         // Decrement count down till next gf
         if (cpi->frames_till_gf_update_due > 0)
@@ -2997,47 +2713,6 @@
 {
     VP8_COMMON *cm = &cpi->common;
 
-#if 0
-    const int *const rfct = cpi->recent_ref_frame_usage;
-    const int rf_intra = rfct[INTRA_FRAME];
-    const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
-
-    if (cm->frame_type == KEY_FRAME)
-    {
-        cpi->prob_intra_coded = 255;
-        cpi->prob_last_coded  = 128;
-        cpi->prob_gf_coded  = 128;
-    }
-    else if (!(rf_intra + rf_inter))
-    {
-        // This is a trap in case this function is called with cpi->recent_ref_frame_usage[] blank.
-        cpi->prob_intra_coded = 63;
-        cpi->prob_last_coded  = 128;
-        cpi->prob_gf_coded    = 128;
-    }
-    else
-    {
-        cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
-
-        if (cpi->prob_intra_coded < 1)
-            cpi->prob_intra_coded = 1;
-
-        if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
-        {
-            cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
-
-            if (cpi->prob_last_coded < 1)
-                cpi->prob_last_coded = 1;
-
-            cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
-                                 ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
-
-            if (cpi->prob_gf_coded < 1)
-                cpi->prob_gf_coded = 1;
-        }
-    }
-
-#else
     const int *const rfct = cpi->count_mb_ref_frame_usage;
     const int rf_intra = rfct[INTRA_FRAME];
     const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
@@ -3050,59 +2725,9 @@
     }
     else if (!(rf_intra + rf_inter))
     {
-        if (cpi->oxcf.number_of_layers > 1)
-        {
-            if (cpi->ref_frame_flags == VP8_LAST_FLAG)
-            {
-                cpi->prob_intra_coded = 63;
-                cpi->prob_last_coded  = 255;
-                cpi->prob_gf_coded    = 128;
-            }
-            else if (cpi->ref_frame_flags == VP8_GOLD_FLAG)
-            {
-                cpi->prob_intra_coded = 63;
-                cpi->prob_last_coded  = 1;
-                cpi->prob_gf_coded    = 255;
-            }
-            else if (cpi->ref_frame_flags == VP8_ALT_FLAG)
-            {
-                cpi->prob_intra_coded = 63;
-                cpi->prob_last_coded  = 1;
-                cpi->prob_gf_coded    = 1;
-            }
-            else
-            {
-                cpi->prob_intra_coded = 63;
-                cpi->prob_last_coded  = 128;
-                cpi->prob_gf_coded    = 128;
-            }
-        }
-        else
-        {
-            // This is a trap in case this function is called with
-            // cpi->recent_ref_frame_usage[] blank.
-            cpi->prob_intra_coded = 63;
-            cpi->prob_last_coded  = 128;
-            cpi->prob_gf_coded    = 128;
-        }
-    }
-    else
-    {
-        cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
-
-        if (cpi->prob_intra_coded < 1)
-            cpi->prob_intra_coded = 1;
-
-        cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
-
-        if (cpi->prob_last_coded < 1)
-            cpi->prob_last_coded = 1;
-
-        cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
-                             ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
-
-        if (cpi->prob_gf_coded < 1)
-            cpi->prob_gf_coded = 1;
+        cpi->prob_intra_coded = 63;
+        cpi->prob_last_coded  = 128;
+        cpi->prob_gf_coded    = 128;
     }
 
     // update reference frame costs since we can do better than what we got last frame.
@@ -3117,7 +2742,6 @@
         else if (cpi->common.frames_since_golden == 0)
         {
             cpi->prob_last_coded = 214;
-            cpi->prob_gf_coded = 1;
         }
         else if (cpi->common.frames_since_golden == 1)
         {
@@ -3126,14 +2750,14 @@
         }
         else if (cpi->source_alt_ref_active)
         {
-            //int dist = cpi->common.frames_till_alt_ref_frame + cpi->common.frames_since_golden;
             cpi->prob_gf_coded -= 20;
 
             if (cpi->prob_gf_coded < 10)
                 cpi->prob_gf_coded = 10;
         }
+        if (!cpi->source_alt_ref_active)
+            cpi->prob_gf_coded = 255;
     }
-#endif
 }
 
 
@@ -3142,12 +2766,12 @@
 {
     VP8_COMMON *cm = &cpi->common;
 
-    int code_key_frame = FALSE;
+    int code_key_frame = 0;
 
     cpi->kf_boost = 0;
 
     if (cpi->Speed > 11)
-        return FALSE;
+        return 0;
 
     // Clear down mmx registers
     vp8_clear_system_state();  //__asm emms;
@@ -3189,10 +2813,10 @@
             && (change > .25 || change2 > .25))
         {
             /*(change > 1.4 || change < .75)&& cpi->this_frame_percent_intra > cpi->last_frame_percent_intra + 3*/
-            return TRUE;
+            return 1;
         }
 
-        return FALSE;
+        return 0;
 
     }
 
@@ -3202,7 +2826,7 @@
         ((cpi->this_frame_percent_intra > 95) &&
          (cpi->this_frame_percent_intra >= (cpi->last_frame_percent_intra + 5))))
     {
-        code_key_frame = TRUE;
+        code_key_frame = 1;
     }
     // in addition if the following are true and this is not a golden frame then code a key frame
     // Note that on golden frames there often seems to be a pop in intra useage anyway hence this
@@ -3215,7 +2839,7 @@
               (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra + 10))))
     {
         if (!cm->refresh_golden_frame)
-            code_key_frame = TRUE;
+            code_key_frame = 1;
     }
 
     return code_key_frame;
@@ -3271,11 +2895,11 @@
 
 // Function to test for conditions that indeicate we should loop
 // back and recode a frame.
-static BOOL recode_loop_test( VP8_COMP *cpi,
+static int recode_loop_test( VP8_COMP *cpi,
                               int high_limit, int low_limit,
                               int q, int maxq, int minq )
 {
-    BOOL    force_recode = FALSE;
+    int force_recode = 0;
     VP8_COMMON *cm = &cpi->common;
 
     // Is frame recode allowed at all
@@ -3291,7 +2915,7 @@
         if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
              ((cpi->projected_frame_size < low_limit) && (q > minq)) )
         {
-            force_recode = TRUE;
+            force_recode = 1;
         }
         // Special Constrained quality tests
         else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
@@ -3301,14 +2925,14 @@
                  (cpi->projected_frame_size <
                      ((cpi->this_frame_target * 7) >> 3)))
             {
-                force_recode = TRUE;
+                force_recode = 1;
             }
             // Severe undershoot and between auto and user cq level
             else if ( (q > cpi->oxcf.cq_level) &&
                       (cpi->projected_frame_size < cpi->min_frame_bandwidth) &&
                       (cpi->active_best_quality > cpi->oxcf.cq_level))
             {
-                force_recode = TRUE;
+                force_recode = 1;
                 cpi->active_best_quality = cpi->oxcf.cq_level;
             }
         }
@@ -3459,7 +3083,7 @@
     int frame_over_shoot_limit;
     int frame_under_shoot_limit;
 
-    int Loop = FALSE;
+    int Loop = 0;
     int loop_count;
     int this_q;
     int last_zbin_oq;
@@ -3471,10 +3095,10 @@
     int top_index;
     int bottom_index;
     VP8_COMMON *cm = &cpi->common;
-    int active_worst_qchanged = FALSE;
+    int active_worst_qchanged = 0;
 
-    int overshoot_seen = FALSE;
-    int undershoot_seen = FALSE;
+    int overshoot_seen = 0;
+    int undershoot_seen = 0;
     int drop_mark = cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100;
     int drop_mark75 = drop_mark * 2 / 3;
     int drop_mark50 = drop_mark / 4;
@@ -3485,7 +3109,7 @@
     vp8_clear_system_state();
 
     // Test code for segmentation of gf/arf (0,0)
-    //segmentation_test_function((VP8_PTR) cpi);
+    //segmentation_test_function( cpi);
 
     if (cpi->compressor_speed == 2)
     {
@@ -3525,12 +3149,12 @@
     // Enable or disable mode based tweaking of the zbin
     // For 2 Pass Only used where GF/ARF prediction quality
     // is above a threshold
-    cpi->zbin_mode_boost_enabled = TRUE;
+    cpi->zbin_mode_boost_enabled = 1;
     if (cpi->pass == 2)
     {
         if ( cpi->gfu_boost <= 400 )
         {
-            cpi->zbin_mode_boost_enabled = FALSE;
+            cpi->zbin_mode_boost_enabled = 0;
         }
     }
 
@@ -3571,7 +3195,7 @@
         }
 
         // The alternate reference frame cannot be active for a key frame
-        cpi->source_alt_ref_active = FALSE;
+        cpi->source_alt_ref_active = 0;
 
         // Reset the RD threshold multipliers to default of * 1 (128)
         for (i = 0; i < MAX_MODES; i++)
@@ -3583,9 +3207,9 @@
     // Test code for segmentation
     //if ( (cm->frame_type == KEY_FRAME) || ((cm->current_video_frame % 2) == 0))
     //if ( (cm->current_video_frame % 2) == 0 )
-    //  enable_segmentation((VP8_PTR)cpi);
+    //  enable_segmentation(cpi);
     //else
-    //  disable_segmentation((VP8_PTR)cpi);
+    //  disable_segmentation(cpi);
 
 #if 0
     // Experimental code for lagged compress and one pass
@@ -3679,7 +3303,7 @@
 
             if (cpi->oxcf.number_of_layers > 1)
             {
-                int i;
+                unsigned int i;
 
                 // Propagate bits saved by dropping the frame to higher layers
                 for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
@@ -4083,7 +3707,7 @@
                 vp8_pick_frame_size(cpi);
 
                 // Clear the Alt reference frame active flag when we have a key frame
-                cpi->source_alt_ref_active = FALSE;
+                cpi->source_alt_ref_active = 0;
 
                 // Reset the loop filter deltas and segmentation map
                 setup_features(cpi);
@@ -4108,7 +3732,7 @@
                 q_high = cpi->active_worst_quality;
 
                 loop_count++;
-                Loop = TRUE;
+                Loop = 1;
 
                 continue;
             }
@@ -4136,10 +3760,10 @@
             }
 
             // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop.
-            active_worst_qchanged = TRUE;
+            active_worst_qchanged = 1;
         }
         else
-            active_worst_qchanged = FALSE;
+            active_worst_qchanged = 0;
 
 #if !(CONFIG_REALTIME_ONLY)
         // Special case handling for forced key frames
@@ -4175,7 +3799,7 @@
             else if (Q < q_low)
                 Q = q_low;
 
-            Loop = ((Q != last_q)) ? TRUE : FALSE;
+            Loop = Q != last_q;
         }
 
         // Is the projected frame size out of range and are we allowed to attempt to recode.
@@ -4232,7 +3856,7 @@
                     }
                 }
 
-                overshoot_seen = TRUE;
+                overshoot_seen = 1;
             }
             // Frame is too small
             else
@@ -4282,7 +3906,7 @@
                     }
                 }
 
-                undershoot_seen = TRUE;
+                undershoot_seen = 1;
             }
 
             // Clamp Q to upper and lower limits:
@@ -4294,18 +3918,18 @@
             // Clamp cpi->zbin_over_quant
             cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ? zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ? zbin_oq_high : cpi->zbin_over_quant;
 
-            //Loop = ((Q != last_q) || (last_zbin_oq != cpi->zbin_over_quant)) ? TRUE : FALSE;
-            Loop = ((Q != last_q)) ? TRUE : FALSE;
+            //Loop = (Q != last_q) || (last_zbin_oq != cpi->zbin_over_quant);
+            Loop = Q != last_q;
             last_zbin_oq = cpi->zbin_over_quant;
         }
         else
 #endif
-            Loop = FALSE;
+            Loop = 0;
 
         if (cpi->is_src_frame_alt_ref)
-            Loop = FALSE;
+            Loop = 0;
 
-        if (Loop == TRUE)
+        if (Loop == 1)
         {
             vp8_restore_coding_context(cpi);
             loop_count++;
@@ -4314,7 +3938,7 @@
 #endif
         }
     }
-    while (Loop == TRUE);
+    while (Loop == 1);
 
 #if 0
     // Experimental code for lagged and one pass
@@ -4456,7 +4080,7 @@
 
     if (cpi->oxcf.number_of_layers > 1)
     {
-        int i;
+        unsigned int i;
         for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
           cpi->layer_context[i].total_byte_count += (*size);
     }
@@ -4523,7 +4147,7 @@
         (cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) &&
         (cpi->projected_frame_size > (4 * cpi->this_frame_target)))
     {
-        cpi->drop_frame = TRUE;
+        cpi->drop_frame = 1;
     }
 
 #endif
@@ -4567,7 +4191,7 @@
     // Propagate values to higher temporal layers
     if (cpi->oxcf.number_of_layers > 1)
     {
-        int i;
+        unsigned int i;
 
         for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
         {
@@ -4882,12 +4506,11 @@
 #endif
 
 
-int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time)
+int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time)
 {
 #if HAVE_ARMV7
     int64_t store_reg[8];
 #endif
-    VP8_COMP              *cpi = (VP8_COMP *) ptr;
     VP8_COMMON            *cm = &cpi->common;
     struct vpx_usec_timer  timer;
     int                    res = 0;
@@ -4936,12 +4559,11 @@
 }
 
 
-int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush)
+int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush)
 {
 #if HAVE_ARMV7
     int64_t store_reg[8];
 #endif
-    VP8_COMP *cpi = (VP8_COMP *) ptr;
     VP8_COMMON *cm;
     struct vpx_usec_timer  tsctimer;
     struct vpx_usec_timer  ticktimer;
@@ -4995,7 +4617,7 @@
             cm->refresh_golden_frame = 0;
             cm->refresh_last_frame = 0;
             cm->show_frame = 0;
-            cpi->source_alt_ref_pending = FALSE;  // Clear Pending alt Ref flag.
+            cpi->source_alt_ref_pending = 0;  // Clear Pending alt Ref flag.
             cpi->is_src_frame_alt_ref = 0;
         }
     }
@@ -5216,6 +4838,17 @@
         vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
     }
 
+    // Save the contexts separately for alt ref, gold and last.
+    // (TODO jbb -> Optimize this with pointers to avoid extra copies. )
+    if(cm->refresh_alt_ref_frame)
+        vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
+
+    if(cm->refresh_golden_frame)
+        vpx_memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc));
+
+    if(cm->refresh_last_frame)
+        vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
+
     // if its a dropped frame honor the requests on subsequent frames
     if (*size > 0)
     {
@@ -5416,10 +5049,8 @@
     return 0;
 }
 
-int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags)
+int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags)
 {
-    VP8_COMP *cpi = (VP8_COMP *) comp;
-
     if (cpi->common.refresh_alt_ref_frame)
         return -1;
     else
@@ -5448,9 +5079,8 @@
     }
 }
 
-int vp8_set_roimap(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4])
+int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4])
 {
-    VP8_COMP *cpi = (VP8_COMP *) comp;
     signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
 
     if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
@@ -5458,15 +5088,15 @@
 
     if (!map)
     {
-        disable_segmentation((VP8_PTR)cpi);
+        disable_segmentation(cpi);
         return 0;
     }
 
     // Set the segmentation Map
-    set_segmentation_map((VP8_PTR)cpi, map);
+    set_segmentation_map(cpi, map);
 
     // Activate segmentation.
-    enable_segmentation((VP8_PTR)cpi);
+    enable_segmentation(cpi);
 
     // Set up the quant segment data
     feature_data[MB_LVL_ALT_Q][0] = delta_q[0];
@@ -5487,15 +5117,13 @@
 
     // Initialise the feature data structure
     // SEGMENT_DELTADATA    0, SEGMENT_ABSDATA      1
-    set_segment_data((VP8_PTR)cpi, &feature_data[0][0], SEGMENT_DELTADATA);
+    set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
 
     return 0;
 }
 
-int vp8_set_active_map(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols)
+int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols)
 {
-    VP8_COMP *cpi = (VP8_COMP *) comp;
-
     if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols)
     {
         if (map)
@@ -5515,10 +5143,8 @@
     }
 }
 
-int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode)
+int vp8_set_internal_size(VP8_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode)
 {
-    VP8_COMP *cpi = (VP8_COMP *) comp;
-
     if (horiz_mode <= ONETWO)
         cpi->common.horiz_scale = horiz_mode;
     else
@@ -5560,8 +5186,7 @@
 }
 
 
-int vp8_get_quantizer(VP8_PTR c)
+int vp8_get_quantizer(VP8_COMP *cpi)
 {
-    VP8_COMP   *cpi = (VP8_COMP *) c;
     return cpi->common.base_qindex;
 }

diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 68ec3ab..46951e3 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h

@@ -259,7 +259,7 @@
     int buffer_level;
     int bits_off_target;
 
-    long long total_actual_bits;
+    int64_t total_actual_bits;
     int total_target_vs_actual;
 
     int worst_quality;
@@ -279,7 +279,7 @@
     int zbin_over_quant;
 
     int inter_frame_target;
-    INT64 total_byte_count;
+    int64_t total_byte_count;
 
     int filter_level;
 
@@ -568,16 +568,21 @@
 
     int base_skip_false_prob[128];
 
+    FRAME_CONTEXT lfc_n; /* last frame entropy */
+    FRAME_CONTEXT lfc_a; /* last alt ref entropy */
+    FRAME_CONTEXT lfc_g; /* last gold ref entropy */
+
+
     struct twopass_rc
     {
         unsigned int section_intra_rating;
         double section_max_qfactor;
         unsigned int next_iiratio;
         unsigned int this_iiratio;
-        FIRSTPASS_STATS *total_stats;
-        FIRSTPASS_STATS *this_frame_stats;
+        FIRSTPASS_STATS total_stats;
+        FIRSTPASS_STATS this_frame_stats;
         FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
-        FIRSTPASS_STATS *total_left_stats;
+        FIRSTPASS_STATS total_left_stats;
         int first_pass_done;
         int64_t bits_left;
         int64_t clip_bits_total;
@@ -668,8 +673,8 @@
     unsigned int current_layer;
     LAYER_CONTEXT layer_context[MAX_LAYERS];
 
-    long long frames_in_layer[MAX_LAYERS];
-    long long bytes_in_layer[MAX_LAYERS];
+    int64_t frames_in_layer[MAX_LAYERS];
+    int64_t bytes_in_layer[MAX_LAYERS];
     double sum_psnr[MAX_LAYERS];
     double sum_psnr_p[MAX_LAYERS];
     double total_error2[MAX_LAYERS];

diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index d9c8975..46f53a1 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c

@@ -21,7 +21,6 @@
 #include "vp8/common/reconinter.h"
 #include "vp8/common/reconintra.h"
 #include "vp8/common/reconintra4x4.h"
-#include "vp8/common/g_common.h"
 #include "variance.h"
 #include "mcomp.h"
 #include "rdopt.h"
@@ -402,9 +401,68 @@
     }
 }
 
+
+#if CONFIG_MULTI_RES_ENCODING
+static
+void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
+                               int *parent_ref_frame,
+                               MB_PREDICTION_MODE *parent_mode,
+                               int_mv *parent_ref_mv, int mb_row, int mb_col)
+{
+    LOWER_RES_INFO* store_mode_info
+                          = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info;
+    unsigned int parent_mb_index;
+    //unsigned int parent_mb_index = map_640x480_to_320x240[mb_row][mb_col];
+
+    /* Consider different down_sampling_factor.  */
+    {
+        /* TODO: Removed the loop that supports special down_sampling_factor
+         * such as 2, 4, 8. Will revisit it if needed.
+         * Should also try using a look-up table to see if it helps
+         * performance. */
+        int round = cpi->oxcf.mr_down_sampling_factor.num/2;
+        int parent_mb_row, parent_mb_col;
+
+        parent_mb_row = (mb_row*cpi->oxcf.mr_down_sampling_factor.den+round)
+                    /cpi->oxcf.mr_down_sampling_factor.num;
+        parent_mb_col = (mb_col*cpi->oxcf.mr_down_sampling_factor.den+round)
+                    /cpi->oxcf.mr_down_sampling_factor.num;
+        parent_mb_index = parent_mb_row*cpi->mr_low_res_mb_cols + parent_mb_col;
+    }
+
+    /* Read lower-resolution mode & motion result from memory.*/
+    *parent_ref_frame = store_mode_info[parent_mb_index].ref_frame;
+    *parent_mode =  store_mode_info[parent_mb_index].mode;
+    *dissim = store_mode_info[parent_mb_index].dissim;
+
+    /* For highest-resolution encoder, adjust dissim value. Lower its quality
+     * for good performance. */
+    if (cpi->oxcf.mr_encoder_id == (cpi->oxcf.mr_total_resolutions - 1))
+        *dissim>>=1;
+
+    if(*parent_ref_frame != INTRA_FRAME)
+    {
+        /* Consider different down_sampling_factor.
+         * The result can be rounded to be more precise, but it takes more time.
+         */
+        //int round = cpi->oxcf.mr_down_sampling_factor.den/2;
+        (*parent_ref_mv).as_mv.row = store_mode_info[parent_mb_index].mv.as_mv.row
+                                  *cpi->oxcf.mr_down_sampling_factor.num
+                                  /cpi->oxcf.mr_down_sampling_factor.den;
+        (*parent_ref_mv).as_mv.col = store_mode_info[parent_mb_index].mv.as_mv.col
+                                  *cpi->oxcf.mr_down_sampling_factor.num
+                                  /cpi->oxcf.mr_down_sampling_factor.den;
+
+        vp8_clamp_mv2(parent_ref_mv, xd);
+    }
+}
+#endif
+
+
 void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                          int recon_uvoffset, int *returnrate,
-                         int *returndistortion, int *returnintra)
+                         int *returndistortion, int *returnintra, int mb_row,
+                         int mb_col)
 {
     BLOCK *b = &x->block[0];
     BLOCKD *d = &x->e_mbd.block[0];
@@ -422,33 +480,38 @@
     int rate;
     int rate2;
     int distortion2;
-    int bestsme;
-    //int all_rds[MAX_MODES];         // Experimental debug code.
+    int bestsme = INT_MAX;
     int best_mode_index = 0;
     unsigned int sse = INT_MAX, best_sse = INT_MAX;
 
     int_mv mvp;
+
     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
     int saddone=0;
     int sr=0;    //search range got from mv_pred(). It uses step_param levels. (0-7)
 
-    int_mv nearest_mv[4];
-    int_mv near_mv[4];
-    int_mv frame_best_ref_mv[4];
-    int MDCounts[4][4];
     unsigned char *y_buffer[4];
     unsigned char *u_buffer[4];
     unsigned char *v_buffer[4];
     int i;
     int ref_frame_map[4];
+    int sign_bias = 0;
 
-    int found_near_mvs[4] = {0, 0, 0, 0};
+    int have_subp_search = cpi->sf.half_pixel_search;  /* In real-time mode,
+                                       when Speed >= 15, no sub-pixel search. */
 
-    int have_subp_search = cpi->sf.half_pixel_search;  /* In real-time mode, when Speed >= 15, no sub-pixel search. */
+#if CONFIG_MULTI_RES_ENCODING
+    int dissim = INT_MAX;
+    int parent_ref_frame = 0;
+    int_mv parent_ref_mv;
+    MB_PREDICTION_MODE parent_mode = 0;
+
+    if (cpi->oxcf.mr_encoder_id)
+        get_lower_res_motion_info(cpi, xd, &dissim, &parent_ref_frame,
+                                  &parent_mode, &parent_ref_mv, mb_row, mb_col);
+#endif
 
     vpx_memset(mode_mv, 0, sizeof(mode_mv));
-    vpx_memset(nearest_mv, 0, sizeof(nearest_mv));
-    vpx_memset(near_mv, 0, sizeof(near_mv));
     vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
 
     /* Setup search priorities */
@@ -463,6 +526,22 @@
     for(; i<4; i++)
         ref_frame_map[i] = -1;
 
+    /* Check to see if there is at least 1 valid reference frame that we need
+     * to calculate near_mvs.
+     */
+    if (ref_frame_map[1] > 0)
+    {
+        vp8_find_near_mvs(&x->e_mbd,
+                          x->e_mbd.mode_info_context,
+                          &mode_mv[NEARESTMV], &mode_mv[NEARMV],
+                          &best_ref_mv,
+                          mdcounts,
+                          ref_frame_map[1],
+                          cpi->common.ref_frame_sign_bias);
+
+        sign_bias = cpi->common.ref_frame_sign_bias[ref_frame_map[1]];
+    }
+
     // set up all the refframe dependent pointers.
     if (cpi->ref_frame_flags & VP8_LAST_FLAG)
     {
@@ -488,7 +567,7 @@
         v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
     }
 
-    cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
+    cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame
 
     *returnintra = INT_MAX;
     x->skip = 0;
@@ -511,59 +590,21 @@
 
         x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
 
-
-        // Check to see if the testing frequency for this mode is at its max
-        // If so then prevent it from being tested and increase the threshold for its testing
-        if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
+#if CONFIG_MULTI_RES_ENCODING
+        if (cpi->oxcf.mr_encoder_id)
         {
-            //if ( (cpi->mbs_tested_so_far / cpi->mode_test_hit_counts[mode_index]) <= cpi->mode_check_freq[mode_index] )
-            if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index]))
-            {
-                // Increase the threshold for coding this mode to make it less likely to be chosen
-                cpi->rd_thresh_mult[mode_index] += 4;
-
-                if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-                    cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-
-                cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
-
+            /* If parent MB is intra, child MB is intra. */
+            if (!parent_ref_frame && this_ref_frame)
                 continue;
-            }
+
+            /* If parent MB is inter, and it is unlikely there are multiple
+             * objects in parent MB, we use parent ref frame as child MB's
+             * ref frame. */
+            if (parent_ref_frame && dissim < 8
+                && parent_ref_frame != this_ref_frame)
+                continue;
         }
-
-        // If nearby MVs haven't been found for this reference frame then do it now.
-        if (x->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
-            !found_near_mvs[x->e_mbd.mode_info_context->mbmi.ref_frame])
-        {
-            int ref_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
-            vp8_find_near_mvs(&x->e_mbd,
-                              x->e_mbd.mode_info_context,
-                              &nearest_mv[ref_frame], &near_mv[ref_frame],
-                              &frame_best_ref_mv[ref_frame],
-                              MDCounts[ref_frame],
-                              ref_frame,
-                              cpi->common.ref_frame_sign_bias);
-            found_near_mvs[ref_frame] = 1;
-        }
-
-        // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested
-        cpi->mode_test_hit_counts[mode_index] ++;
-
-        rate2 = 0;
-        distortion2 = 0;
-
-        this_mode = vp8_mode_order[mode_index];
-
-        // Experimental debug code.
-        //all_rds[mode_index] = -1;
-
-        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
-        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
-
-        // Work out the cost assosciated with selecting the reference frame
-        frame_cost =
-            x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
-        rate2 += frame_cost;
+#endif
 
         // everything but intra
         if (x->e_mbd.mode_info_context->mbmi.ref_frame)
@@ -571,25 +612,95 @@
             x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
             x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
             x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            mode_mv[NEARMV] = near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
+
+            if (sign_bias !=
+                cpi->common.ref_frame_sign_bias[x->e_mbd.mode_info_context->mbmi.ref_frame])
+            {
+                mode_mv[NEARESTMV].as_mv.row *= -1;
+                mode_mv[NEARESTMV].as_mv.col *= -1;
+                mode_mv[NEARMV].as_mv.row *= -1;
+                mode_mv[NEARMV].as_mv.col *= -1;
+                best_ref_mv.as_mv.row *= -1;
+                best_ref_mv.as_mv.col *= -1;
+                sign_bias
+                = cpi->common.ref_frame_sign_bias[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            }
+
+#if CONFIG_MULTI_RES_ENCODING
+            if (cpi->oxcf.mr_encoder_id)
+            {
+                if (vp8_mode_order[mode_index] == NEARESTMV &&
+                    mode_mv[NEARESTMV].as_int ==0)
+                    continue;
+                if (vp8_mode_order[mode_index] == NEARMV &&
+                    mode_mv[NEARMV].as_int ==0)
+                    continue;
+
+                if (vp8_mode_order[mode_index] == NEWMV && parent_mode == ZEROMV
+                    && best_ref_mv.as_int==0) //&& dissim==0
+                    continue;
+                else if(vp8_mode_order[mode_index] == NEWMV && dissim==0
+                    && best_ref_mv.as_int==parent_ref_mv.as_int)
+                    continue;
+            }
+#endif
         }
 
-        // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
-        // unless ARNR filtering is enabled in which case we want
-        // an unfiltered alternative
+        /* Check to see if the testing frequency for this mode is at its max
+         * If so then prevent it from being tested and increase the threshold
+         * for its testing */
+        if (cpi->mode_test_hit_counts[mode_index] &&
+                                         (cpi->mode_check_freq[mode_index] > 1))
+        {
+            if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] *
+                                         cpi->mode_test_hit_counts[mode_index]))
+            {
+                /* Increase the threshold for coding this mode to make it less
+                 * likely to be chosen */
+                cpi->rd_thresh_mult[mode_index] += 4;
+
+                if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
+                    cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
+
+                cpi->rd_threshes[mode_index] =
+                                 (cpi->rd_baseline_thresh[mode_index] >> 7) *
+                                 cpi->rd_thresh_mult[mode_index];
+                continue;
+            }
+        }
+
+        /* We have now reached the point where we are going to test the current
+         * mode so increment the counter for the number of times it has been
+         * tested */
+        cpi->mode_test_hit_counts[mode_index] ++;
+
+        rate2 = 0;
+        distortion2 = 0;
+
+        this_mode = vp8_mode_order[mode_index];
+
+        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+
+        /* Work out the cost assosciated with selecting the reference frame */
+        frame_cost =
+            x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
+        rate2 += frame_cost;
+
+        /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+         * unless ARNR filtering is enabled in which case we want
+         * an unfiltered alternative */
         if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
         {
-            if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
+            if (this_mode != ZEROMV ||
+                x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
                 continue;
         }
 
         switch (this_mode)
         {
         case B_PRED:
-            // Pass best so far to pick_intra4x4mby_modes to use as breakout
+            /* Pass best so far to pick_intra4x4mby_modes to use as breakout */
             distortion2 = best_sse;
             pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2);
 
@@ -648,10 +759,12 @@
             int sadpb = x->sadperbit16;
             int_mv mvp_full;
 
-            int col_min = (best_ref_mv.as_mv.col>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv.as_mv.col & 7)?1:0);
-            int row_min = (best_ref_mv.as_mv.row>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv.as_mv.row & 7)?1:0);
-            int col_max = (best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL;
-            int row_max = (best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL;
+            int col_min = ((best_ref_mv.as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
+            int row_min = ((best_ref_mv.as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
+            int col_max = (best_ref_mv.as_mv.col>>3)
+                         + MAX_FULL_PEL_VAL;
+            int row_max = (best_ref_mv.as_mv.row>>3)
+                         + MAX_FULL_PEL_VAL;
 
             int tmp_col_min = x->mv_col_min;
             int tmp_col_max = x->mv_col_max;
@@ -663,118 +776,156 @@
             // Further step/diamond searches as necessary
             step_param = cpi->sf.first_step + speed_adjust;
 
-            if(cpi->sf.improved_mv_pred)
+#if CONFIG_MULTI_RES_ENCODING
+            if (cpi->oxcf.mr_encoder_id)
             {
-                if(!saddone)
-                {
-                    vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
-                    saddone = 1;
-                }
+                // Use parent MV as predictor. Adjust search range accordingly.
+                mvp.as_int = parent_ref_mv.as_int;
+                mvp_full.as_mv.col = parent_ref_mv.as_mv.col>>3;
+                mvp_full.as_mv.row = parent_ref_mv.as_mv.row>>3;
 
-                vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
-                            x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
-
-                sr += speed_adjust;
-                //adjust search range according to sr from mv prediction
-                if(sr > step_param)
-                    step_param = sr;
-
-                mvp_full.as_mv.col = mvp.as_mv.col>>3;
-                mvp_full.as_mv.row = mvp.as_mv.row>>3;
-
+                if(dissim <=32) step_param += 3;
+                else if(dissim <=128) step_param += 2;
+                else step_param += 1;
             }else
+#endif
             {
-                mvp.as_int = best_ref_mv.as_int;
-                mvp_full.as_mv.col = best_ref_mv.as_mv.col>>3;
-                mvp_full.as_mv.row = best_ref_mv.as_mv.row>>3;
+                if(cpi->sf.improved_mv_pred)
+                {
+                    if(!saddone)
+                    {
+                        vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
+                        saddone = 1;
+                    }
+
+                    vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context,
+                                &mvp,x->e_mbd.mode_info_context->mbmi.ref_frame,
+                                cpi->common.ref_frame_sign_bias, &sr,
+                                &near_sadidx[0]);
+
+                    sr += speed_adjust;
+                    //adjust search range according to sr from mv prediction
+                    if(sr > step_param)
+                        step_param = sr;
+
+                    mvp_full.as_mv.col = mvp.as_mv.col>>3;
+                    mvp_full.as_mv.row = mvp.as_mv.row>>3;
+                }else
+                {
+                    mvp.as_int = best_ref_mv.as_int;
+                    mvp_full.as_mv.col = best_ref_mv.as_mv.col>>3;
+                    mvp_full.as_mv.row = best_ref_mv.as_mv.row>>3;
+                }
             }
 
-            // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
-            if (x->mv_col_min < col_min )
-                x->mv_col_min = col_min;
-            if (x->mv_col_max > col_max )
-                x->mv_col_max = col_max;
-            if (x->mv_row_min < row_min )
-                x->mv_row_min = row_min;
-            if (x->mv_row_max > row_max )
-                x->mv_row_max = row_max;
-
-            further_steps = (cpi->Speed >= 8)? 0: (cpi->sf.max_step_search_steps - 1 - step_param);
-
-            if (cpi->sf.search_method == HEX)
+#if CONFIG_MULTI_RES_ENCODING
+            if (cpi->oxcf.mr_encoder_id && dissim <= 2 &&
+                MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row),
+                    abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4)
             {
+                d->bmi.mv.as_int = mvp_full.as_int;
+                mode_mv[NEWMV].as_int = mvp_full.as_int;
+
+                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv, &best_ref_mv,
+                                             x->errorperbit,
+                                             &cpi->fn_ptr[BLOCK_16X16],
+                                             cpi->mb.mvcost,
+                                             &distortion2,&sse);
+            }else
+#endif
+            {
+                /* Get intersection of UMV window and valid MV window to
+                 * reduce # of checks in diamond search. */
+                if (x->mv_col_min < col_min )
+                    x->mv_col_min = col_min;
+                if (x->mv_col_max > col_max )
+                    x->mv_col_max = col_max;
+                if (x->mv_row_min < row_min )
+                    x->mv_row_min = row_min;
+                if (x->mv_row_max > row_max )
+                    x->mv_row_max = row_max;
+
+                further_steps = (cpi->Speed >= 8)?
+                           0: (cpi->sf.max_step_search_steps - 1 - step_param);
+
+                if (cpi->sf.search_method == HEX)
+                {
 #if CONFIG_MULTI_RES_ENCODING
                 /* TODO: In higher-res pick_inter_mode, step_param is used to
                  * modify hex search range. Here, set step_param to 0 not to
                  * change the behavior in lowest-resolution encoder.
                  * Will improve it later.
                  */
-                step_param = 0;
+                if (!cpi->oxcf.mr_encoder_id)
+                    step_param = 0;
 #endif
-                bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv, step_param,
-                                      sadpb, &cpi->fn_ptr[BLOCK_16X16],
-                                      x->mvsadcost, x->mvcost, &best_ref_mv);
-                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
-            }
-            else
-            {
-                bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
-                                      step_param, sadpb, &num00,
-                                      &cpi->fn_ptr[BLOCK_16X16],
-                                      x->mvcost, &best_ref_mv);
-                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
-
-                // Further step/diamond searches as necessary
-                n = 0;
-                //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
-
-                n = num00;
-                num00 = 0;
-
-                while (n < further_steps)
+                    bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv,
+                                          step_param, sadpb,
+                                          &cpi->fn_ptr[BLOCK_16X16],
+                                          x->mvsadcost, x->mvcost, &best_ref_mv);
+                    mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
+                }
+                else
                 {
-                    n++;
+                    bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full,
+                                          &d->bmi.mv, step_param, sadpb, &num00,
+                                          &cpi->fn_ptr[BLOCK_16X16],
+                                          x->mvcost, &best_ref_mv);
+                    mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
 
-                    if (num00)
-                        num00--;
-                    else
+                    // Further step/diamond searches as necessary
+                    n = 0;
+                    //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+
+                    n = num00;
+                    num00 = 0;
+
+                    while (n < further_steps)
                     {
-                        thissme =
-                        cpi->diamond_search_sad(x, b, d, &mvp_full,
-                                                &d->bmi.mv,
-                                                step_param + n,
-                                                sadpb, &num00,
-                                                &cpi->fn_ptr[BLOCK_16X16],
-                                                x->mvcost, &best_ref_mv);
-                        if (thissme < bestsme)
-                        {
-                            bestsme = thissme;
-                            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
-                        }
+                        n++;
+
+                        if (num00)
+                            num00--;
                         else
                         {
-                            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
+                            thissme =
+                            cpi->diamond_search_sad(x, b, d, &mvp_full,
+                                                    &d->bmi.mv,
+                                                    step_param + n,
+                                                    sadpb, &num00,
+                                                    &cpi->fn_ptr[BLOCK_16X16],
+                                                    x->mvcost, &best_ref_mv);
+                            if (thissme < bestsme)
+                            {
+                                bestsme = thissme;
+                                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
+                            }
+                            else
+                            {
+                                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
+                            }
                         }
                     }
                 }
-            }
 
-            x->mv_col_min = tmp_col_min;
-            x->mv_col_max = tmp_col_max;
-            x->mv_row_min = tmp_row_min;
-            x->mv_row_max = tmp_row_max;
+                x->mv_col_min = tmp_col_min;
+                x->mv_col_max = tmp_col_max;
+                x->mv_row_min = tmp_row_min;
+                x->mv_row_max = tmp_row_max;
 
-            if (bestsme < INT_MAX)
-                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv, &best_ref_mv,
-                                             x->errorperbit,
+                if (bestsme < INT_MAX)
+                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv,
+                                             &best_ref_mv, x->errorperbit,
                                              &cpi->fn_ptr[BLOCK_16X16],
                                              cpi->mb.mvcost,
                                              &distortion2,&sse);
+            }
 
             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
 
             // mv cost;
-            rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, cpi->mb.mvcost, 128);
+            rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv,
+                                     cpi->mb.mvcost, 128);
         }
 
         case NEARESTMV:
@@ -785,18 +936,23 @@
 
         case ZEROMV:
 
-            // Trap vectors that reach beyond the UMV borders
-            // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
-            // because of the lack of break statements in the previous two cases.
-            if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
-                ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
+            /* Trap vectors that reach beyond the UMV borders
+             * Note that ALL New MV, Nearest MV Near MV and Zero MV code drops
+             * through to this point because of the lack of break statements
+             * in the previous two cases.
+             */
+            if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
+                ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
+                ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
+                ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
                 continue;
 
             rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
             x->e_mbd.mode_info_context->mbmi.mv.as_int =
                                                     mode_mv[this_mode].as_int;
 
-            /* Exit early and don't compute the distortion if this macroblock is marked inactive. */
+            /* Exit early and don't compute the distortion if this macroblock
+             * is marked inactive. */
             if (cpi->active_map_enabled && x->active_ptr[0] == 0)
             {
                 sse = 0;
@@ -831,9 +987,6 @@
             break;
         }
 
-        // Experimental debug code.
-        //all_rds[mode_index] = this_rd;
-
         if (this_rd < best_rd || x->skip)
         {
             // Note index of best mode
@@ -843,14 +996,23 @@
             *returndistortion = distortion2;
             best_sse = sse;
             best_rd = this_rd;
-            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
+                       sizeof(MB_MODE_INFO));
 
-            // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
-            cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
-            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+            /* Testing this mode gave rise to an improvement in best error
+             * score. Lower threshold a bit for next time
+             */
+            cpi->rd_thresh_mult[mode_index] =
+                     (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
+                     cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
+            cpi->rd_threshes[mode_index] =
+                                   (cpi->rd_baseline_thresh[mode_index] >> 7) *
+                                   cpi->rd_thresh_mult[mode_index];
         }
 
-        // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
+        /* If the mode did not help improve the best error case then raise the
+         * threshold for testing that mode next time around.
+         */
         else
         {
             cpi->rd_thresh_mult[mode_index] += 4;
@@ -858,7 +1020,9 @@
             if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
                 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
 
-            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+            cpi->rd_threshes[mode_index] =
+                         (cpi->rd_baseline_thresh[mode_index] >> 7) *
+                         cpi->rd_thresh_mult[mode_index];
         }
 
         if (x->skip)
@@ -870,8 +1034,14 @@
     {
         int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 3);
 
-        cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
-        cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
+        cpi->rd_thresh_mult[best_mode_index] =
+                        (cpi->rd_thresh_mult[best_mode_index]
+                        >= (MIN_THRESHMULT + best_adjustment)) ?
+                        cpi->rd_thresh_mult[best_mode_index] - best_adjustment :
+                        MIN_THRESHMULT;
+        cpi->rd_threshes[best_mode_index] =
+                        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
+                        cpi->rd_thresh_mult[best_mode_index];
     }
 
 
@@ -894,15 +1064,17 @@
         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
         x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
         x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
-                                        (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+                                        (cpi->common.mb_no_coeff_skip);
         x->e_mbd.mode_info_context->mbmi.partitioning = 0;
 
         return;
     }
 
-    /* set to the best mb mode, this copy can be skip if x->skip since it already has the right content */
+    /* set to the best mb mode, this copy can be skip if x->skip since it
+     * already has the right content */
     if (!x->skip)
-        vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode,
+                   sizeof(MB_MODE_INFO));
 
     if (best_mbmode.mode <= B_PRED)
     {
@@ -910,7 +1082,14 @@
         pick_intra_mbuv_mode(x);
     }
 
-    update_mvcount(cpi, &x->e_mbd, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
+    if (sign_bias
+      != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
+    {
+        best_ref_mv.as_mv.row *= -1;
+        best_ref_mv.as_mv.col *= -1;
+    }
+
+    update_mvcount(cpi, &x->e_mbd, &best_ref_mv);
 }
 
 
@@ -957,568 +1136,3 @@
 
     *rate_ = best_rate;
 }
-
-#if CONFIG_MULTI_RES_ENCODING
-void vp8_mr_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                         int recon_uvoffset, int *returnrate,
-                         int *returndistortion, int *returnintra, int mb_row,
-                         int mb_col)
-{
-    BLOCK *b = &x->block[0];
-    BLOCKD *d = &x->e_mbd.block[0];
-    MACROBLOCKD *xd = &x->e_mbd;
-    MB_MODE_INFO best_mbmode;
-
-    int_mv best_ref_mv;
-    int_mv mode_mv[MB_MODE_COUNT];
-    MB_PREDICTION_MODE this_mode;
-    int num00;
-    int mdcounts[4];
-    int best_rd = INT_MAX; // 1 << 30;
-    int best_intra_rd = INT_MAX;
-    int mode_index;
-    int rate;
-    int rate2;
-    int distortion2;
-    int bestsme;
-    int best_mode_index = 0;
-    unsigned int sse = INT_MAX, best_sse = INT_MAX;
-
-    int_mv mvp;
-    int_mv nearest_mv[4];
-    int_mv near_mv[4];
-    int_mv frame_best_ref_mv[4];
-    int MDCounts[4][4];
-    unsigned char *y_buffer[4];
-    unsigned char *u_buffer[4];
-    unsigned char *v_buffer[4];
-    int skip_mode[4] = {0, 0, 0, 0};
-    int have_subp_search = cpi->sf.half_pixel_search;  /* In real-time mode,
-                                       when Speed >= 15, no sub-pixel search. */
-    int lfdone=0, gfdone=0, afdone=0;
-
-    LOWER_RES_INFO* store_mode_info
-                              = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info;
-    unsigned int parent_mb_index;
-    //unsigned int parent_mb_index = map_640x480_to_320x240[mb_row][mb_col];
-    int dissim;
-    int parent_ref_frame;
-    int_mv parent_ref_mv;
-    MB_PREDICTION_MODE parent_mode;
-
-    /* Consider different down_sampling_factor.  */
-    {
-        /* TODO: Removed the loop that supports special down_sampling_factor
-         * such as 2, 4, 8. Will revisit it if needed.
-         * Should also try using a look-up table to see if it helps
-         * performance. */
-        int round = cpi->oxcf.mr_down_sampling_factor.num/2;
-        int parent_mb_row, parent_mb_col;
-
-        parent_mb_row = (mb_row*cpi->oxcf.mr_down_sampling_factor.den+round)
-                        /cpi->oxcf.mr_down_sampling_factor.num;
-        parent_mb_col = (mb_col*cpi->oxcf.mr_down_sampling_factor.den+round)
-                        /cpi->oxcf.mr_down_sampling_factor.num;
-        parent_mb_index = parent_mb_row*cpi->mr_low_res_mb_cols + parent_mb_col;
-    }
-
-    /* Read lower-resolution mode & motion result from memory.*/
-    parent_ref_frame = store_mode_info[parent_mb_index].ref_frame;
-    parent_mode =  store_mode_info[parent_mb_index].mode;
-    dissim = store_mode_info[parent_mb_index].dissim;
-
-    /* For highest-resolution encoder, adjust dissim value. Lower its quality
-     * for good performance. */
-    if (cpi->oxcf.mr_encoder_id == (cpi->oxcf.mr_total_resolutions - 1))
-        dissim>>=1;
-
-    if(parent_ref_frame != INTRA_FRAME)
-    {
-        /* Consider different down_sampling_factor.
-         * The result can be rounded to be more precise, but it takes more time.
-         */
-        //int round = cpi->oxcf.mr_down_sampling_factor.den/2;
-        parent_ref_mv.as_mv.row = store_mode_info[parent_mb_index].mv.as_mv.row
-                                  *cpi->oxcf.mr_down_sampling_factor.num
-                                  /cpi->oxcf.mr_down_sampling_factor.den;
-        parent_ref_mv.as_mv.col = store_mode_info[parent_mb_index].mv.as_mv.col
-                                  *cpi->oxcf.mr_down_sampling_factor.num
-                                  /cpi->oxcf.mr_down_sampling_factor.den;
-
-        vp8_clamp_mv2(&parent_ref_mv, xd);
-    }
-
-    vpx_memset(mode_mv, 0, sizeof(mode_mv));
-    vpx_memset(nearest_mv, 0, sizeof(nearest_mv));
-    vpx_memset(near_mv, 0, sizeof(near_mv));
-    vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
-
-    cpi->mbs_tested_so_far++;
-
-    *returnintra = INT_MAX;
-    x->skip = 0;
-
-    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
-
-    // if we encode a new mv this is important
-    // find the best new motion vector
-    for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
-    {
-        int frame_cost;
-        int this_rd = INT_MAX;
-
-        if (best_rd <= cpi->rd_threshes[mode_index])
-            continue;
-
-        /* If parent MB is intra, child MB is intra. */
-        if (!parent_ref_frame && vp8_ref_frame_order[mode_index])
-            continue;
-
-        /* If parent MB is inter, and it is unlikely there are multiple objects
-         * in parent MB, we use parent ref frame as child MB's ref frame. */
-        if (parent_ref_frame && dissim < 8
-            && parent_ref_frame != vp8_ref_frame_order[mode_index])
-            continue;
-
-        x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
-
-        if(x->e_mbd.mode_info_context->mbmi.ref_frame)
-        {
-            if(x->e_mbd.mode_info_context->mbmi.ref_frame==LAST_FRAME && !lfdone)
-            {
-                // set up all the refframe dependent pointers.
-                //if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME
-                //&& (cpi->ref_frame_flags & VP8_LAST_FLAG))
-                if (cpi->ref_frame_flags & VP8_LAST_FLAG)
-                {
-                    YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
-
-                    vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context,
-                        &nearest_mv[LAST_FRAME], &near_mv[LAST_FRAME],
-                        &frame_best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME],
-                        LAST_FRAME, cpi->common.ref_frame_sign_bias);
-
-                    y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
-                    u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
-                    v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset;
-                }
-                else
-                    skip_mode[LAST_FRAME] = 1;
-
-                lfdone = 1;
-            }
-
-            if(x->e_mbd.mode_info_context->mbmi.ref_frame==GOLDEN_FRAME && !gfdone)
-            {
-                //if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME
-                //&& (cpi->ref_frame_flags & VP8_GOLD_FLAG))
-                if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
-                {
-                    YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
-
-                    vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context,
-                        &nearest_mv[GOLDEN_FRAME], &near_mv[GOLDEN_FRAME],
-                        &frame_best_ref_mv[GOLDEN_FRAME],MDCounts[GOLDEN_FRAME],
-                        GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
-
-                    y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
-                    u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
-                    v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset;
-                }
-                else
-                    skip_mode[GOLDEN_FRAME] = 1;
-
-                gfdone = 1;
-            }
-
-            if(x->e_mbd.mode_info_context->mbmi.ref_frame==ALTREF_FRAME && !afdone)
-            {
-                //if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME
-                //&& (cpi->ref_frame_flags & VP8_ALT_FLAG && cpi->source_alt_ref_active))
-                if (cpi->ref_frame_flags & VP8_ALT_FLAG && cpi->source_alt_ref_active)
-                {
-                    YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
-
-                    vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context,
-                        &nearest_mv[ALTREF_FRAME], &near_mv[ALTREF_FRAME],
-                        &frame_best_ref_mv[ALTREF_FRAME],MDCounts[ALTREF_FRAME],
-                        ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
-
-                    y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
-                    u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
-                    v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
-                }
-                else
-                    skip_mode[ALTREF_FRAME] = 1;
-
-                afdone = 1;
-            }
-
-            if (skip_mode[x->e_mbd.mode_info_context->mbmi.ref_frame])
-                continue;
-
-            x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            mode_mv[NEARMV] = near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
-
-            if (vp8_mode_order[mode_index] == NEARESTMV && mode_mv[NEARESTMV].as_int ==0)
-                continue;
-            if (vp8_mode_order[mode_index] == NEARMV && mode_mv[NEARMV].as_int ==0)
-                continue;
-
-            if (vp8_mode_order[mode_index] == NEWMV && parent_mode == ZEROMV
-                && best_ref_mv.as_int==0) //&& dissim==0
-                continue;
-            else if(vp8_mode_order[mode_index] == NEWMV && dissim==0
-                && best_ref_mv.as_int==parent_ref_mv.as_int)
-                continue;
-        }
-
-        // Check to see if the testing frequency for this mode is at its max
-        // If so then prevent it from being tested and increase the threshold for its testing
-        if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
-        {
-            //if ( (cpi->mbs_tested_so_far / cpi->mode_test_hit_counts[mode_index]) <= cpi->mode_check_freq[mode_index] )
-            if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index]))
-            {
-                // Increase the threshold for coding this mode to make it less likely to be chosen
-                cpi->rd_thresh_mult[mode_index] += 4;
-
-                if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-                    cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-
-                cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
-
-                continue;
-            }
-        }
-
-        // We have now reached the point where we are going to test the current
-        //mode so increment the counter for the number of times it has been tested
-        cpi->mode_test_hit_counts[mode_index] ++;
-
-        rate2 = 0;
-        distortion2 = 0;
-
-        this_mode = vp8_mode_order[mode_index];
-
-        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
-        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
-
-        // Work out the cost assosciated with selecting the reference frame
-        frame_cost =
-            x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
-        rate2 += frame_cost;
-
-        // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
-        // unless ARNR filtering is enabled in which case we want
-        // an unfiltered alternative
-        if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
-        {
-            if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
-                continue;
-        }
-
-        switch (this_mode)
-        {
-          case B_PRED:
-              // Pass best so far to pick_intra4x4mby_modes to use as breakout
-              distortion2 = best_sse;
-              pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2);
-
-              if (distortion2 == INT_MAX)
-              {
-                  this_rd = INT_MAX;
-              }
-              else
-              {
-                  rate2 += rate;
-                  distortion2 = VARIANCE_INVOKE
-                                  (&cpi->rtcd.variance, var16x16)(
-                                      *(b->base_src), b->src_stride,
-                                      x->e_mbd.predictor, 16, &sse);
-                  this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-
-                  if (this_rd < best_intra_rd)
-                  {
-                      best_intra_rd = this_rd;
-                      *returnintra = distortion2;
-                  }
-              }
-
-              break;
-
-          case DC_PRED:
-          case V_PRED:
-          case H_PRED:
-          case TM_PRED:
-              RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
-                  (&x->e_mbd);
-              distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
-                                            (*(b->base_src), b->src_stride,
-                                            x->e_mbd.predictor, 16, &sse);
-              rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
-              this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-
-              if (this_rd < best_intra_rd)
-              {
-                  best_intra_rd = this_rd;
-                  *returnintra = distortion2;
-              }
-              break;
-
-        case NEWMV:
-        {
-            int thissme;
-            int step_param;
-            int further_steps;
-            int n = 0;
-            int sadpb = x->sadperbit16;
-            int_mv mvp_full;
-
-            int col_min = (best_ref_mv.as_mv.col>>3) - MAX_FULL_PEL_VAL
-                        + ((best_ref_mv.as_mv.col & 7)?1:0);
-            int row_min = (best_ref_mv.as_mv.row>>3) - MAX_FULL_PEL_VAL
-                        + ((best_ref_mv.as_mv.row & 7)?1:0);
-            int col_max = (best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL;
-            int row_max = (best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL;
-
-            int tmp_col_min = x->mv_col_min;
-            int tmp_col_max = x->mv_col_max;
-            int tmp_row_min = x->mv_row_min;
-            int tmp_row_max = x->mv_row_max;
-
-            int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1;
-            int diff_mv = MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row),
-                              abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col));
-
-            // Further step/diamond searches as necessary
-            step_param = cpi->sf.first_step + speed_adjust;   //sf->first_step = 1; for -6 step_param =3;
-
-            // Use parent MV as predictor. Adjust search range accordingly.
-            mvp.as_int = parent_ref_mv.as_int;
-            mvp_full.as_mv.col = parent_ref_mv.as_mv.col>>3;
-            mvp_full.as_mv.row = parent_ref_mv.as_mv.row>>3;
-
-            if(dissim <=32) step_param += 3;
-            else if(dissim <=128) step_param += 2;
-            else step_param += 1;
-
-            if(dissim >2 || diff_mv >4)
-            {
-                /* Get intersection of UMV window and valid MV window to
-                 * reduce # of checks in diamond search. */
-                if (x->mv_col_min < col_min )
-                    x->mv_col_min = col_min;
-                if (x->mv_col_max > col_max )
-                    x->mv_col_max = col_max;
-                if (x->mv_row_min < row_min )
-                    x->mv_row_min = row_min;
-                if (x->mv_row_max > row_max )
-                    x->mv_row_max = row_max;
-
-                further_steps = (cpi->Speed >= 8)?
-                           0: (cpi->sf.max_step_search_steps - 1 - step_param);
-
-                if (cpi->sf.search_method == HEX)
-                {
-                    bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv, step_param,
-                                          sadpb, &cpi->fn_ptr[BLOCK_16X16],
-                                          x->mvsadcost, x->mvcost, &best_ref_mv);
-                    mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
-                }
-                else
-                {
-                    bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
-                                          step_param, sadpb, &num00,
-                                          &cpi->fn_ptr[BLOCK_16X16],
-                                          x->mvcost, &best_ref_mv);
-                    mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
-
-                    // Further step/diamond searches as necessary
-                    n = 0;
-                    //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
-
-                    n = num00;
-                    num00 = 0;
-
-                    while (n < further_steps)
-                    {
-                        n++;
-
-                        if (num00)
-                            num00--;
-                        else
-                        {
-                            thissme =
-                            cpi->diamond_search_sad(x, b, d, &mvp_full,
-                                                    &d->bmi.mv,
-                                                    step_param + n,
-                                                    sadpb, &num00,
-                                                    &cpi->fn_ptr[BLOCK_16X16],
-                                                    x->mvcost, &best_ref_mv);
-                            if (thissme < bestsme)
-                            {
-                                bestsme = thissme;
-                                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
-                            }
-                            else
-                            {
-                                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
-                            }
-                        }
-                    }
-                }
-
-                x->mv_col_min = tmp_col_min;
-                x->mv_col_max = tmp_col_max;
-                x->mv_row_min = tmp_row_min;
-                x->mv_row_max = tmp_row_max;
-            }else
-            {
-                d->bmi.mv.as_int = mvp_full.as_int;
-                mode_mv[NEWMV].as_int = mvp_full.as_int;
-            }
-
-            // This is not needed.
-            //if (bestsme < INT_MAX)
-                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv, &best_ref_mv,
-                                             x->errorperbit,
-                                             &cpi->fn_ptr[BLOCK_16X16],
-                                             cpi->mb.mvcost,
-                                             &distortion2,&sse);
-
-            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
-
-            // mv cost;
-            rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, cpi->mb.mvcost, 128);
-        }
-
-        case NEARESTMV:
-        case NEARMV:
-          // Trap vectors that reach beyond the UMV borders
-          // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops
-          // through to this point because of the lack of break statements
-          // in the previous two cases.
-          if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
-              ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
-              ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
-              ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
-              continue;
-
-        case ZEROMV:
-            rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
-            x->e_mbd.mode_info_context->mbmi.mv.as_int =
-                                                    mode_mv[this_mode].as_int;
-
-            if((this_mode != NEWMV) ||
-                !(have_subp_search) || cpi->common.full_pixel==1)
-                distortion2 = get_inter_mbpred_error(x,
-                                                     &cpi->fn_ptr[BLOCK_16X16],
-                                                     &sse, mode_mv[this_mode]);
-
-            this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-
-            if (cpi->active_map_enabled && x->active_ptr[0] == 0)
-            {
-                x->skip = 1;
-            }
-            else if (sse < x->encode_breakout)
-            {
-                // Check u and v to make sure skip is ok
-                int sse2 = 0;
-
-                sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
-
-                if (sse2 * 2 < x->encode_breakout)
-                    x->skip = 1;
-                else
-                    x->skip = 0;
-            }
-
-            break;
-        default:
-            break;
-        }
-
-        if (this_rd < best_rd || x->skip)
-        {
-            // Note index of best mode
-            best_mode_index = mode_index;
-
-            *returnrate = rate2;
-            *returndistortion = distortion2;
-            best_sse = sse;
-            best_rd = this_rd;
-            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
-
-            // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
-            cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
-            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
-        }
-        // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
-        else
-        {
-            cpi->rd_thresh_mult[mode_index] += 4;
-
-            if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-                cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-
-            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
-        }
-
-        if (x->skip)
-            break;
-    }
-
-    // Reduce the activation RD thresholds for the best choice mode
-    if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
-    {
-        int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 3);
-
-        cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
-        cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
-    }
-
-
-    {
-        int this_rdbin = (*returndistortion >> 7);
-
-        if (this_rdbin >= 1024)
-        {
-            this_rdbin = 1023;
-        }
-
-        cpi->error_bins[this_rdbin] ++;
-    }
-
-    if (cpi->is_src_frame_alt_ref &&
-        (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME))
-    {
-        x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
-        x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
-        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
-        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
-        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
-                                        (cpi->common.mb_no_coeff_skip) ? 1 : 0;
-        x->e_mbd.mode_info_context->mbmi.partitioning = 0;
-
-        return;
-    }
-
-    /* set to the best mb mode */
-    vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
-
-    if (best_mbmode.mode <= B_PRED)
-    {
-        /* set mode_info_context->mbmi.uv_mode */
-        pick_intra_mbuv_mode(x);
-    }
-
-    update_mvcount(cpi, &x->e_mbd, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
-}
-#endif

diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index df6042f..3d83782 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h

@@ -16,14 +16,8 @@
 
 extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                                 int recon_uvoffset, int *returnrate,
-                                int *returndistortion, int *returnintra);
+                                int *returndistortion, int *returnintra,
+                                int mb_row, int mb_col);
 extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
 
-#if CONFIG_MULTI_RES_ENCODING
-extern void vp8_mr_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x,
-                                   int recon_yoffset, int recon_uvoffset,
-                                   int *returnrate, int *returndistortion,
-                                   int *returnintra, int mb_row, int mb_col);
-#endif
-
 #endif

diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index e97a560..2449ae5 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c

@@ -186,7 +186,7 @@
     best_err = calc_partial_ssl_err(sd, cm->frame_to_show,
                                     IF_RTCD(&cpi->rtcd.variance));
 
-    filt_val -= (1 + ((filt_val > 10) ? 1 : 0));
+    filt_val -= 1 + (filt_val > 10);
 
     // Search lower filter levels
     while (filt_val >= min_filter_level)
@@ -209,11 +209,11 @@
             break;
 
         // Adjust filter level
-        filt_val -= (1 + ((filt_val > 10) ? 1 : 0));
+        filt_val -= 1 + (filt_val > 10);
     }
 
     // Search up (note that we have already done filt_val = cm->filter_level)
-    filt_val = cm->filter_level + (1 + ((filt_val > 10) ? 1 : 0));
+    filt_val = cm->filter_level + 1 + (filt_val > 10);
 
     if (best_filt_val == cm->filter_level)
     {
@@ -243,7 +243,7 @@
                 break;
 
             // Adjust filter level
-            filt_val += (1 + ((filt_val > 10) ? 1 : 0));
+            filt_val += 1 + (filt_val > 10);
         }
     }
 
@@ -289,8 +289,12 @@
 
     int Bias = 0;                       // Bias against raising loop filter and in favor of lowering it
 
+    int ss_err[MAX_LOOP_FILTER + 1];
+
     YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show;
 
+    vpx_memset(ss_err, 0, sizeof(ss_err));
+
     /* Replace unfiltered frame buffer with a new one */
     cm->frame_to_show = &cpi->pick_lf_lvl_frame;
 
@@ -320,6 +324,9 @@
 
     best_err = vp8_calc_ss_err(sd, cm->frame_to_show,
                                IF_RTCD(&cpi->rtcd.variance));
+
+    ss_err[filt_mid] = best_err;
+
     filt_best = filt_mid;
 
     while (filter_step > 0)
@@ -335,13 +342,19 @@
 
         if ((filt_direction <= 0) && (filt_low != filt_mid))
         {
-            // Get Low filter error score
-            vp8_yv12_copy_y_ptr(saved_frame, cm->frame_to_show);
-            vp8cx_set_alt_lf_level(cpi, filt_low);
-            vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
+            if(ss_err[filt_low] == 0)
+            {
+                // Get Low filter error score
+                vp8_yv12_copy_y_ptr(saved_frame, cm->frame_to_show);
+                vp8cx_set_alt_lf_level(cpi, filt_low);
+                vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
 
-            filt_err = vp8_calc_ss_err(sd, cm->frame_to_show,
-                                       IF_RTCD(&cpi->rtcd.variance));
+                filt_err = vp8_calc_ss_err(sd, cm->frame_to_show,
+                                           IF_RTCD(&cpi->rtcd.variance));
+                ss_err[filt_low] = filt_err;
+            }
+            else
+                filt_err = ss_err[filt_low];
 
             // If value is close to the best so far then bias towards a lower loop filter value.
             if ((filt_err - Bias) < best_err)
@@ -357,12 +370,18 @@
         // Now look at filt_high
         if ((filt_direction >= 0) && (filt_high != filt_mid))
         {
-            vp8_yv12_copy_y_ptr(saved_frame, cm->frame_to_show);
-            vp8cx_set_alt_lf_level(cpi, filt_high);
-            vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
+            if(ss_err[filt_high] == 0)
+            {
+                vp8_yv12_copy_y_ptr(saved_frame, cm->frame_to_show);
+                vp8cx_set_alt_lf_level(cpi, filt_high);
+                vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
 
-            filt_err = vp8_calc_ss_err(sd, cm->frame_to_show,
-                                       IF_RTCD(&cpi->rtcd.variance));
+                filt_err = vp8_calc_ss_err(sd, cm->frame_to_show,
+                                           IF_RTCD(&cpi->rtcd.variance));
+                ss_err[filt_high] = filt_err;
+            }
+            else
+                filt_err = ss_err[filt_high];
 
             // Was it better than the previous best?
             if (filt_err < (best_err - Bias))

diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 3ca8758..ce04212 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c

@@ -436,7 +436,8 @@
     int quant_val;
     int Q;
 
-    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
+    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44,
+                          44, 44};
 
     for (Q = 0; Q < QINDEX_RANGE; Q++)
     {
@@ -469,36 +470,61 @@
         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         // all the ac values = ;
-        for (i = 1; i < 16; i++)
+        quant_val = vp8_ac_yquant(Q);
+        cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,
+                     cpi->Y1quant_shift[Q] + 1, quant_val);
+        cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][1] = quant_val;
+        cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+        quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+        cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1,
+                     cpi->Y2quant_shift[Q] + 1, quant_val);
+        cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][1] = quant_val;
+        cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+        quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+        cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1,
+                     cpi->UVquant_shift[Q] + 1, quant_val);
+        cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][1] = quant_val;
+        cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+        for (i = 2; i < 16; i++)
         {
-            int rc = vp8_default_zig_zag1d[i];
+            cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1];
+            cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1];
+            cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];
+            cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];
+            cpi->Y1round[Q][i] = cpi->Y1round[Q][1];
+            cpi->common.Y1dequant[Q][i] = cpi->common.Y1dequant[Q][1];
+            cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *
+                                             zbin_boost[i]) >> 7;
 
-            quant_val = vp8_ac_yquant(Q);
-            cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
-            invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
-                         cpi->Y1quant_shift[Q] + rc, quant_val);
-            cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.Y1dequant[Q][rc] = quant_val;
-            cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+            cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1];
+            cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1];
+            cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];
+            cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];
+            cpi->Y2round[Q][i] = cpi->Y2round[Q][1];
+            cpi->common.Y2dequant[Q][i] = cpi->common.Y2dequant[Q][1];
+            cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *
+                                             zbin_boost[i]) >> 7;
 
-            quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
-            invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
-                         cpi->Y2quant_shift[Q] + rc, quant_val);
-            cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-            cpi->common.Y2dequant[Q][rc] = quant_val;
-            cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
-
-            quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
-            invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
-                         cpi->UVquant_shift[Q] + rc, quant_val);
-            cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.UVdequant[Q][rc] = quant_val;
-            cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+            cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1];
+            cpi->UVquant[Q][i] = cpi->UVquant[Q][1];
+            cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];
+            cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];
+            cpi->UVround[Q][i] = cpi->UVround[Q][1];
+            cpi->common.UVdequant[Q][i] = cpi->common.UVdequant[Q][1];
+            cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *
+                                             zbin_boost[i]) >> 7;
         }
     }
 }

diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index db1a327..1c43c11 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c

@@ -304,6 +304,8 @@
     // Setup for Key frame:
 
     vp8_default_coef_probs(& cpi->common);
+
+
     vp8_kf_default_bmode_probs(cpi->common.kf_bmode_prob);
 
     vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
@@ -314,6 +316,12 @@
 
     vpx_memset(cpi->common.fc.pre_mvc, 0, sizeof(cpi->common.fc.pre_mvc));  //initialize pre_mvc to all zero.
 
+    // Make sure we initialize separate contexts for altref,gold, and normal.
+    // TODO shouldn't need 3 different copies of structure to do this!
+    vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
+    vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
+    vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
+
     //cpi->common.filter_level = 0;      // Reset every key frame.
     cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ;
 
@@ -324,8 +332,8 @@
     else
         cpi->frames_till_gf_update_due = cpi->goldfreq;
 
-    cpi->common.refresh_golden_frame = TRUE;
-    cpi->common.refresh_alt_ref_frame = TRUE;
+    cpi->common.refresh_golden_frame = 1;
+    cpi->common.refresh_alt_ref_frame = 1;
 }
 
 
@@ -463,7 +471,7 @@
     if (cpi->pass != 2)
     {
         // Single Pass lagged mode: TBD
-        if (FALSE)
+        if (0)
         {
         }
 
@@ -590,14 +598,14 @@
     if (cpi->pass != 2)
     {
         // For now Alt ref is not allowed except in 2 pass modes.
-        cpi->source_alt_ref_pending = FALSE;
+        cpi->source_alt_ref_pending = 0;
 
         /*if ( cpi->oxcf.fixed_q == -1)
         {
             if ( cpi->oxcf.play_alternate && (cpi->last_boost > (100 + (AF_THRESH*cpi->frames_till_gf_update_due)) ) )
-                cpi->source_alt_ref_pending = TRUE;
+                cpi->source_alt_ref_pending = 1;
             else
-                cpi->source_alt_ref_pending = FALSE;
+                cpi->source_alt_ref_pending = 0;
         }*/
     }
 }
@@ -932,6 +940,8 @@
             if (cpi->active_worst_quality <= cpi->active_best_quality)
                 cpi->active_worst_quality = cpi->active_best_quality + 1;
 
+            if(cpi->active_worst_quality > 127)
+                cpi->active_worst_quality = 127;
         }
         // Unbuffered mode (eg. video conferencing)
         else
@@ -972,7 +982,7 @@
 #endif
             //vpx_log("Decoder: Drop frame due to bandwidth: %d \n",cpi->buffer_level, cpi->av_per_frame_bandwidth);
 
-            cpi->drop_frame = TRUE;
+            cpi->drop_frame = 1;
         }
 
 #if 0
@@ -980,7 +990,7 @@
         else if ((cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) &&
                  (cpi->drop_count < cpi->max_drop_count) && (cpi->pass == 0))
         {
-            cpi->drop_frame = TRUE;
+            cpi->drop_frame = 1;
         }
 
 #endif
@@ -1026,11 +1036,11 @@
         {
             // For one pass throw a GF if recent frame intra useage is low or the GF useage is high
             if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5))
-                cpi->common.refresh_golden_frame = TRUE;
+                cpi->common.refresh_golden_frame = 1;
 
             // Two pass GF descision
             else if (cpi->pass == 2)
-                cpi->common.refresh_golden_frame = TRUE;
+                cpi->common.refresh_golden_frame = 1;
         }
 
 #if 0
@@ -1048,7 +1058,7 @@
 
 #endif
 
-        if (cpi->common.refresh_golden_frame == TRUE)
+        if (cpi->common.refresh_golden_frame == 1)
         {
 #if 0
 
@@ -1533,7 +1543,7 @@
         // Check if we're dropping the frame:
         if (cpi->drop_frame)
         {
-            cpi->drop_frame = FALSE;
+            cpi->drop_frame = 0;
             cpi->drop_count++;
             return 0;
         }

diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 62a2cfc..ce97961 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c

@@ -28,7 +28,6 @@
 #include "encodemb.h"
 #include "quantize.h"
 #include "vp8/common/idct.h"
-#include "vp8/common/g_common.h"
 #include "variance.h"
 #include "mcomp.h"
 #include "rdopt.h"
@@ -288,12 +287,33 @@
         }
     }
 
-    fill_token_costs(
-        cpi->mb.token_costs,
-        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs
-    );
+    {
+      // build token cost array for the type of frame we have now
+      FRAME_CONTEXT *l = &cpi->lfc_n;
 
-    vp8_init_mode_costs(cpi);
+      if(cpi->common.refresh_alt_ref_frame)
+          l = &cpi->lfc_a;
+      else if(cpi->common.refresh_golden_frame)
+          l = &cpi->lfc_g;
+
+      fill_token_costs(
+          cpi->mb.token_costs,
+          (const vp8_prob( *)[8][3][11]) l->coef_probs
+      );
+      /*
+      fill_token_costs(
+          cpi->mb.token_costs,
+          (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
+      */
+
+
+      // TODO make these mode costs depend on last,alt or gold too.  (jbb)
+      vp8_init_mode_costs(cpi);
+
+      // TODO figure onnnnuut why making mv cost frame type dependent didn't help (jbb)
+      //vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) l->mvc, flags);
+
+    }
 
 }
 
@@ -1364,8 +1384,8 @@
 
         if (bsi.segment_rd < best_rd)
         {
-            int col_min = (best_ref_mv->as_mv.col>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv->as_mv.col & 7)?1:0);
-            int row_min = (best_ref_mv->as_mv.row>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv->as_mv.row & 7)?1:0);
+            int col_min = ((best_ref_mv->as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
+            int row_min = ((best_ref_mv->as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
             int col_max = (best_ref_mv->as_mv.col>>3) + MAX_FULL_PEL_VAL;
             int row_max = (best_ref_mv->as_mv.row>>3) + MAX_FULL_PEL_VAL;
 
@@ -1695,7 +1715,9 @@
     }
 }
 
-void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
+void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
+                            int recon_uvoffset, int *returnrate,
+                            int *returndistortion, int *returnintra)
 {
     BLOCK *b = &x->block[0];
     BLOCKD *d = &x->e_mbd.block[0];
@@ -1722,27 +1744,20 @@
     int distortion_uv;
     int best_yrd = INT_MAX;
 
-    //int all_rds[MAX_MODES];        // Experimental debug code.
-    //int all_rates[MAX_MODES];
-    //int all_dist[MAX_MODES];
-    //int intermodecost[MAX_MODES];
-
     MB_PREDICTION_MODE uv_intra_mode;
     int_mv mvp;
     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
     int saddone=0;
     int sr=0;    //search range got from mv_pred(). It uses step_param levels. (0-7)
 
-    int_mv frame_nearest_mv[4];
-    int_mv frame_near_mv[4];
-    int_mv frame_best_ref_mv[4];
-    int frame_mdcounts[4][4];
     int frame_lf_or_gf[4];
     unsigned char *y_buffer[4];
     unsigned char *u_buffer[4];
     unsigned char *v_buffer[4];
     int ref_frame_map[4];
+    int sign_bias = 0;
 
+    vpx_memset(mode_mv, 0, sizeof(mode_mv));
     vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
     vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
 
@@ -1758,17 +1773,29 @@
     for(; i<4; i++)
         ref_frame_map[i] = -1;
 
+    /* Check to see if there is at least 1 valid reference frame that we need
+     * to calculate near_mvs.
+     */
+    if (ref_frame_map[1] > 0)
+    {
+        vp8_find_near_mvs(&x->e_mbd,
+                          x->e_mbd.mode_info_context,
+                          &mode_mv[NEARESTMV], &mode_mv[NEARMV],
+                          &best_ref_mv,
+                          mdcounts,
+                          ref_frame_map[1],
+                          cpi->common.ref_frame_sign_bias);
+
+        sign_bias = cpi->common.ref_frame_sign_bias[ref_frame_map[1]];
+    }
+
     if (cpi->ref_frame_flags & VP8_LAST_FLAG)
     {
         YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
 
-        vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[LAST_FRAME], &frame_near_mv[LAST_FRAME],
-                          &frame_best_ref_mv[LAST_FRAME], frame_mdcounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
-
         y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
         u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
         v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset;
-
         frame_lf_or_gf[LAST_FRAME] = 0;
     }
 
@@ -1776,13 +1803,9 @@
     {
         YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
 
-        vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[GOLDEN_FRAME], &frame_near_mv[GOLDEN_FRAME],
-                          &frame_best_ref_mv[GOLDEN_FRAME], frame_mdcounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
-
         y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
         u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
         v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset;
-
         frame_lf_or_gf[GOLDEN_FRAME] = 1;
     }
 
@@ -1790,13 +1813,9 @@
     {
         YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
 
-        vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[ALTREF_FRAME], &frame_near_mv[ALTREF_FRAME],
-                          &frame_best_ref_mv[ALTREF_FRAME], frame_mdcounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
-
         y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
         u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
         v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
-
         frame_lf_or_gf[ALTREF_FRAME] = 1;
     }
 
@@ -1805,8 +1824,6 @@
 
     x->skip = 0;
 
-    vpx_memset(mode_mv, 0, sizeof(mode_mv));
-
     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
     rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
     uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
@@ -1819,13 +1836,6 @@
         int other_cost = 0;
         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
 
-        // Experimental debug code.
-        // Record of rd values recorded for this MB. -1 indicates not measured
-        //all_rds[mode_index] = -1;
-        //all_rates[mode_index] = -1;
-        //all_dist[mode_index] = -1;
-        //intermodecost[mode_index] = -1;
-
         // Test best rd so far against threshold for trying this mode.
         if (best_rd <= cpi->rd_threshes[mode_index])
             continue;
@@ -1858,10 +1868,20 @@
             x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
             x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
             x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            mode_mv[NEARESTMV] = frame_nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            mode_mv[NEARMV] = frame_near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
-            vpx_memcpy(mdcounts, frame_mdcounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
+
+            if (sign_bias !=
+                cpi->common.ref_frame_sign_bias[x->e_mbd.mode_info_context->mbmi.ref_frame])
+            {
+                mode_mv[NEARESTMV].as_mv.row *= -1;
+                mode_mv[NEARESTMV].as_mv.col *= -1;
+                mode_mv[NEARMV].as_mv.row *= -1;
+                mode_mv[NEARMV].as_mv.col *= -1;
+                best_ref_mv.as_mv.row *= -1;
+                best_ref_mv.as_mv.col *= -1;
+                sign_bias
+                = cpi->common.ref_frame_sign_bias[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            }
+
             lf_or_gf = frame_lf_or_gf[x->e_mbd.mode_info_context->mbmi.ref_frame];
         }
 
@@ -1995,8 +2015,8 @@
             int sadpb = x->sadperbit16;
             int_mv mvp_full;
 
-            int col_min = (best_ref_mv.as_mv.col>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv.as_mv.col & 7)?1:0);
-            int row_min = (best_ref_mv.as_mv.row>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv.as_mv.row & 7)?1:0);
+            int col_min = ((best_ref_mv.as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
+            int row_min = ((best_ref_mv.as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
             int col_max = (best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL;
             int row_max = (best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL;
 
@@ -2145,7 +2165,7 @@
                 continue;
 
             vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
-            vp8_build_inter16x16_predictors_mby(&x->e_mbd);
+            vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
 
             if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
                 x->skip = 1;
@@ -2265,11 +2285,6 @@
             this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
         }
 
-        // Experimental debug code.
-        //all_rds[mode_index] = this_rd;
-        //all_rates[mode_index] = rate2;
-        //all_dist[mode_index] = distortion2;
-
         // Keep record of best intra distortion
         if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
             (this_rd < best_intra_rd) )
@@ -2370,7 +2385,7 @@
         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
         x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
         x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
-                                        (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+                                        (cpi->common.mb_no_coeff_skip);
         x->e_mbd.mode_info_context->mbmi.partitioning = 0;
 
         return;
@@ -2397,10 +2412,14 @@
                                       x->partition_info->bmi[15].mv.as_int;
     }
 
-    rd_update_mvcount(cpi, x, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
+    if (sign_bias
+        != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
+    {
+        best_ref_mv.as_mv.row *= -1;
+        best_ref_mv.as_mv.col *= -1;
+    }
 
-
-
+    rd_update_mvcount(cpi, x, &best_ref_mv);
 }
 
 void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)

diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index b9ade1c..545e4f2 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c

@@ -22,7 +22,6 @@
 #include "ratectrl.h"
 #include "vp8/common/quant_common.h"
 #include "segmentation.h"
-#include "vp8/common/g_common.h"
 #include "vpx_scale/yv12extend.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vp8/common/swapyv12buffer.h"
@@ -98,7 +97,7 @@
     unsigned short *count
 )
 {
-    int i, j, k;
+    unsigned int i, j, k;
     int modifier;
     int byte = 0;
 
@@ -186,7 +185,7 @@
     if (cpi->Speed < 8)
     {
         step_param = cpi->sf.first_step +
-                    ((cpi->Speed > 5) ? 1 : 0);
+                    (cpi->Speed > 5);
         further_steps =
             (cpi->sf.max_step_search_steps - 1)-step_param;
     }

diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index e819485..8bfc47f 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c

@@ -514,17 +514,19 @@
     TOKENEXTRA **tp,
     ENTROPY_CONTEXT *a,
     ENTROPY_CONTEXT *l,
+    int type,
     VP8_COMP *cpi
 )
 {
     int pt; /* near block/prev token context index */
+    int band;
     TOKENEXTRA *t = *tp;        /* store tokens starting here */
     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-
+    band = type ? 0 : 1;
     t->Token = DCT_EOB_TOKEN;
-    t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt];
+    t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
     t->skip_eob_node = 0;
-    ++cpi->coef_counts       [0] [1] [pt] [DCT_EOB_TOKEN];
+    ++cpi->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];
     ++t;
     *tp = t;
     pt = 0; /* 0 <-> all coeff data is zero */
@@ -561,15 +563,19 @@
     ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
     int plane_type;
     int b;
-
-    stuff2nd_order_b(t,
+    plane_type = 3;
+    if((x->mode_info_context->mbmi.mode != B_PRED
+                        && x->mode_info_context->mbmi.mode != SPLITMV))
+    {
+        stuff2nd_order_b(t,
                      A + vp8_block2above[24], L + vp8_block2left[24], cpi);
-    plane_type = 0;
+        plane_type = 0;
+    }
 
     for (b = 0; b < 16; b++)
         stuff1st_order_b(t,
                          A + vp8_block2above[b],
-                         L + vp8_block2left[b], cpi);
+                         L + vp8_block2left[b], plane_type, cpi);
 
     for (b = 16; b < 24; b++)
         stuff1st_order_buv(t,

diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 683af34..0e56432 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk

@@ -9,7 +9,6 @@
 ##
 
 VP8_COMMON_SRCS-yes += vp8_common.mk
-VP8_COMMON_SRCS-yes += common/type_aliases.h
 VP8_COMMON_SRCS-yes += common/pragmas.h
 VP8_COMMON_SRCS-yes += common/ppflags.h
 VP8_COMMON_SRCS-yes += common/onyx.h
@@ -20,6 +19,8 @@
 VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
 VP8_COMMON_SRCS-yes += common/debugmodes.c
 VP8_COMMON_SRCS-yes += common/default_coef_probs.h
+VP8_COMMON_SRCS-yes += common/dequantize.c
+VP8_COMMON_SRCS-yes += common/dequantize.h
 VP8_COMMON_SRCS-yes += common/entropy.c
 VP8_COMMON_SRCS-yes += common/entropymode.c
 VP8_COMMON_SRCS-yes += common/entropymv.c
@@ -28,17 +29,16 @@
 VP8_COMMON_SRCS-yes += common/filter.h
 VP8_COMMON_SRCS-yes += common/findnearmv.c
 VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
+VP8_COMMON_SRCS-yes += common/idct_blk.c
 VP8_COMMON_SRCS-yes += common/idctllm.c
 VP8_COMMON_SRCS-yes += common/alloccommon.h
 VP8_COMMON_SRCS-yes += common/blockd.h
 VP8_COMMON_SRCS-yes += common/common.h
-VP8_COMMON_SRCS-yes += common/common_types.h
 VP8_COMMON_SRCS-yes += common/entropy.h
 VP8_COMMON_SRCS-yes += common/entropymode.h
 VP8_COMMON_SRCS-yes += common/entropymv.h
 VP8_COMMON_SRCS-yes += common/extend.h
 VP8_COMMON_SRCS-yes += common/findnearmv.h
-VP8_COMMON_SRCS-yes += common/g_common.h
 VP8_COMMON_SRCS-yes += common/header.h
 VP8_COMMON_SRCS-yes += common/idct.h
 VP8_COMMON_SRCS-yes += common/invtrans.h
@@ -57,7 +57,6 @@
 VP8_COMMON_SRCS-yes += common/systemdependent.h
 VP8_COMMON_SRCS-yes += common/threading.h
 VP8_COMMON_SRCS-yes += common/treecoder.h
-VP8_COMMON_SRCS-yes += common/invtrans.c
 VP8_COMMON_SRCS-yes += common/loopfilter.c
 VP8_COMMON_SRCS-yes += common/loopfilter_filters.c
 VP8_COMMON_SRCS-yes += common/mbpitch.c
@@ -69,9 +68,13 @@
 VP8_COMMON_SRCS-yes += common/reconintra4x4.c
 VP8_COMMON_SRCS-yes += common/setupintrarecon.c
 VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
+
+
+
 VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
 VP8_COMMON_SRCS-yes += common/treecoder.c
 
+VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/dequantize_x86.h
 VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c
 VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h
 VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h
@@ -84,11 +87,14 @@
 VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
 VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
 VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
+VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
@@ -106,8 +112,6 @@
 
 # common (c)
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/arm_systemdependent.c
-VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/bilinearfilter_arm.c
-VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/bilinearfilter_arm.h
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/filter_arm.c
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/idct_arm.h
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/loopfilter_arm.c
@@ -115,8 +119,12 @@
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/recon_arm.h
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/reconintra_arm.c
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/subpixel_arm.h
+VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/dequantize_arm.c
+VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/dequantize_arm.h
 
 # common (armv6)
+VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.c
+VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.h
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/bilinearfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x4_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x8_v6$(ASM)
@@ -129,6 +137,9 @@
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/simpleloopfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/sixtappredict8x4_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/intra4x4_predict_v6$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/dequant_idct_v6$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/dequantize_v6$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/idct_blk_v6.c
 
 # common (neon)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/bilinearpredict4x4_neon$(ASM)
@@ -151,3 +162,8 @@
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/sixtappredict16x16_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/save_neon_reg$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/dequant_idct_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/dequantizeb_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/idct_blk_neon.c

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 5bb6b40..6181ee8 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c

@@ -83,7 +83,7 @@
     vpx_codec_enc_cfg_t     cfg;
     struct vp8_extracfg     vp8_cfg;
     VP8_CONFIG              oxcf;
-    VP8_PTR             cpi;
+    struct VP8_COMP        *cpi;
     unsigned char          *cx_data;
     unsigned int            cx_data_sz;
     vpx_image_t             preview_img;
@@ -137,7 +137,8 @@
 
 static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
                                        const vpx_codec_enc_cfg_t *cfg,
-                                       const struct vp8_extracfg *vp8_cfg)
+                                       const struct vp8_extracfg *vp8_cfg,
+                                       int                        finalize)
 {
     RANGE_CHECK(cfg, g_w,                   1, 16383); /* 14 bits available */
     RANGE_CHECK(cfg, g_h,                   1, 16383); /* 14 bits available */
@@ -193,6 +194,9 @@
     RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
     RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
     RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
+    if(finalize && cfg->rc_end_usage == VPX_CQ)
+        RANGE_CHECK(vp8_cfg, cq_level,
+                    cfg->rc_min_quantizer, cfg->rc_max_quantizer);
 
 #if !(CONFIG_REALTIME_ONLY)
     if (cfg->g_pass == VPX_RC_LAST_PASS)
@@ -357,6 +361,10 @@
     }
 
 #if CONFIG_MULTI_RES_ENCODING
+    /* When mr_cfg is NULL, oxcf->mr_total_resolutions and oxcf->mr_encoder_id
+     * are both memset to 0, which ensures the correct logic under this
+     * situation.
+     */
     if(mr_cfg)
     {
         oxcf->mr_total_resolutions        = mr_cfg->mr_total_resolutions;
@@ -439,7 +447,7 @@
     if ((cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames))
         ERROR("Cannot increase lag_in_frames");
 
-    res = validate_config(ctx, cfg, &ctx->vp8_cfg);
+    res = validate_config(ctx, cfg, &ctx->vp8_cfg, 0);
 
     if (!res)
     {
@@ -505,7 +513,7 @@
 
     }
 
-    res = validate_config(ctx, &ctx->cfg, &xcfg);
+    res = validate_config(ctx, &ctx->cfg, &xcfg, 0);
 
     if (!res)
     {
@@ -548,7 +556,7 @@
     vpx_codec_enc_cfg_t       *cfg;
     unsigned int               i;
 
-    VP8_PTR optr;
+    struct VP8_COMP *optr;
 
     if (!ctx->priv)
     {
@@ -602,7 +610,7 @@
 
         vp8_initialize();
 
-        res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
+        res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0);
 
         if (!res)
         {
@@ -732,6 +740,9 @@
     if (img)
         res = validate_img(ctx, img);
 
+    if (!res)
+        res = validate_config(ctx, &ctx->cfg, &ctx->vp8_cfg, 1);
+
     pick_quickcompress_mode(ctx, duration, deadline);
     vpx_codec_pkt_list_init(&ctx->pkt_list);
 

diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 54bdb85..fbe5817 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c

@@ -57,7 +57,7 @@
     vp8_stream_info_t       si;
     int                     defer_alloc;
     int                     decoder_init;
-    VP8D_PTR                pbi;
+    struct VP8D_COMP       *pbi;
     int                     postproc_cfg_set;
     vp8_postproc_cfg_t      postproc_cfg;
 #if CONFIG_POSTPROC_VISUALIZER
@@ -389,7 +389,7 @@
         if (!res)
         {
             VP8D_CONFIG oxcf;
-            VP8D_PTR optr;
+            struct VP8D_COMP* optr;
 
             vp8dx_initialize();
 

diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index d88b595..d6dc153 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk

@@ -52,7 +52,6 @@
 VP8_DX_SRCS-yes += decoder/dboolhuff.c
 VP8_DX_SRCS-yes += decoder/decodemv.c
 VP8_DX_SRCS-yes += decoder/decodframe.c
-VP8_DX_SRCS-yes += decoder/dequantize.c
 VP8_DX_SRCS-yes += decoder/detokenize.c
 VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/ec_types.h
 VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.h
@@ -61,20 +60,14 @@
 VP8_DX_SRCS-yes += decoder/dboolhuff.h
 VP8_DX_SRCS-yes += decoder/decodemv.h
 VP8_DX_SRCS-yes += decoder/decoderthreading.h
-VP8_DX_SRCS-yes += decoder/dequantize.h
 VP8_DX_SRCS-yes += decoder/detokenize.h
 VP8_DX_SRCS-yes += decoder/onyxd_int.h
 VP8_DX_SRCS-yes += decoder/treereader.h
 VP8_DX_SRCS-yes += decoder/onyxd_if.c
 VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
-VP8_DX_SRCS-yes += decoder/idct_blk.c
 VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h
 VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c
 
 VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
 
-VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/dequantize_x86.h
 VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c
-VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm
-VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c
-VP8_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c

diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index b08f946..fa1aaea 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk

@@ -12,17 +12,3 @@
 #VP8_DX_SRCS list is modified according to different platforms.
 
 VP8_DX_SRCS-$(ARCH_ARM)  += decoder/arm/arm_dsystemdependent.c
-VP8_DX_SRCS-$(ARCH_ARM)  += decoder/arm/dequantize_arm.c
-VP8_DX_SRCS-$(ARCH_ARM)  += decoder/arm/dequantize_arm.h
-
-#File list for armv6
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_idct_v6$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/idct_blk_v6.c
-
-#File list for neon
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_idct_neon$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_dequant_full_2x_neon$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_dequant_0_2x_neon$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_blk_neon.c

diff --git a/vp8_multi_resolution_encoder.c b/vp8_multi_resolution_encoder.c
index 732f96e..11c33d6 100644
--- a/vp8_multi_resolution_encoder.c
+++ b/vp8_multi_resolution_encoder.c

@@ -78,6 +78,8 @@
     exit(EXIT_FAILURE);
 }
 
+int (*read_frame_p)(FILE *f, vpx_image_t *img);
+
 static int read_frame(FILE *f, vpx_image_t *img) {
     size_t nbytes, to_read;
     int    res = 1;
@@ -92,6 +94,55 @@
     return res;
 }
 
+static int read_frame_by_row(FILE *f, vpx_image_t *img) {
+    size_t nbytes, to_read;
+    int    res = 1;
+    int plane;
+
+    for (plane = 0; plane < 3; plane++)
+    {
+        unsigned char *ptr;
+        int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
+        int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
+        int r;
+
+        /* Determine the correct plane based on the image format. The for-loop
+         * always counts in Y,U,V order, but this may not match the order of
+         * the data on disk.
+         */
+        switch (plane)
+        {
+        case 1:
+            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
+            break;
+        case 2:
+            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
+            break;
+        default:
+            ptr = img->planes[plane];
+        }
+
+        for (r = 0; r < h; r++)
+        {
+            to_read = w;
+
+            nbytes = fread(ptr, 1, to_read, f);
+            if(nbytes != to_read) {
+                res = 0;
+                if(nbytes > 0)
+                    printf("Warning: Read partial frame. Check your width & height!\n");
+                break;
+            }
+
+            ptr += img->stride[plane];
+        }
+        if (!res)
+            break;
+    }
+
+    return res;
+}
+
 static void write_ivf_file_header(FILE *outfile,
                                   const vpx_codec_enc_cfg_t *cfg,
                                   int frame_cnt) {
@@ -262,9 +313,14 @@
 
     /* Allocate image for each encoder */
     for (i=0; i< NUM_ENCODERS; i++)
-        if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 1))
+        if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
             die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
 
+    if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
+        read_frame_p = read_frame;
+    else
+        read_frame_p = read_frame_by_row;
+
     for (i=0; i< NUM_ENCODERS; i++)
         write_ivf_file_header(outfile[i], &cfg[i], 0);
 
@@ -305,35 +361,22 @@
         const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
 
         flags = 0;
-        frame_avail = read_frame(infile, &raw[0]);
+        frame_avail = read_frame_p(infile, &raw[0]);
 
-        for ( i=1; i<NUM_ENCODERS; i++)
+        if(frame_avail)
         {
-            if(frame_avail)
+            for ( i=1; i<NUM_ENCODERS; i++)
             {
                 /*Scale the image down a number of times by downsampling factor*/
-                int src_uvwidth = (raw[i-1].d_w + 1) >> 1;
-                int src_uvheight = (raw[i-1].d_h + 1) >> 1;
-                const unsigned char* src_y = raw[i-1].planes[VPX_PLANE_Y];
-                const unsigned char* src_u = raw[i-1].planes[VPX_PLANE_Y]
-                                             + raw[i-1].d_w*raw[i-1].d_h;
-                const unsigned char* src_v = raw[i-1].planes[VPX_PLANE_Y]
-                                             + raw[i-1].d_w*raw[i-1].d_h
-                                             + src_uvwidth*src_uvheight;
-                int dst_uvwidth = (raw[i].d_w + 1) >> 1;
-                int dst_uvheight = (raw[i].d_h + 1) >> 1;
-                unsigned char* dst_y = raw[i].planes[VPX_PLANE_Y];
-                unsigned char* dst_u = raw[i].planes[VPX_PLANE_Y]
-                                       + raw[i].d_w*raw[i].d_h;
-                unsigned char* dst_v = raw[i].planes[VPX_PLANE_Y]
-                                       + raw[i].d_w*raw[i].d_h
-                                       + dst_uvwidth*dst_uvheight;
-
                 /* FilterMode 1 or 2 give better psnr than FilterMode 0. */
-                I420Scale(src_y, raw[i-1].d_w, src_u, src_uvwidth, src_v,
-                          src_uvwidth, raw[i-1].d_w, raw[i-1].d_h,
-                          dst_y, raw[i].d_w, dst_u, dst_uvwidth,
-                          dst_v, dst_uvwidth, raw[i].d_w, raw[i].d_h, 1);
+                I420Scale(raw[i-1].planes[VPX_PLANE_Y], raw[i-1].stride[VPX_PLANE_Y],
+                          raw[i-1].planes[VPX_PLANE_U], raw[i-1].stride[VPX_PLANE_U],
+                          raw[i-1].planes[VPX_PLANE_V], raw[i-1].stride[VPX_PLANE_V],
+                          raw[i-1].d_w, raw[i-1].d_h,
+                          raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
+                          raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
+                          raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
+                          raw[i].d_w, raw[i].d_h, 1);
             }
         }
 

diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index bddad23..03ddc62 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c

@@ -243,6 +243,7 @@
                 ctx--;
                 if (img) img--;
             }
+            ctx++;
         }
 
         FLOATING_POINT_RESTORE();

diff --git a/vpx/vpx_integer.h b/vpx/vpx_integer.h
index 608760f..218bca7 100644
--- a/vpx/vpx_integer.h
+++ b/vpx/vpx_integer.h

@@ -29,16 +29,8 @@
 typedef unsigned __int64 uint64_t;
 #endif
 
-#ifdef HAVE_ARMV6
-typedef unsigned int int_fast16_t;
-#else
-typedef signed short int_fast16_t;
-#endif
-typedef signed char int_fast8_t;
-typedef unsigned char uint_fast8_t;
-
 #ifndef _UINTPTR_T_DEFINED
-typedef unsigned int   uintptr_t;
+typedef size_t uintptr_t;
 #endif
 
 #else

diff --git a/vpxenc.c b/vpxenc.c
index 2e14f24..3637acb 100644
--- a/vpxenc.c
+++ b/vpxenc.c

@@ -807,7 +807,7 @@
 
     {
         EbmlLoc start;
-        int i;
+        unsigned int i;
 
         glob->cue_pos = ftello(glob->stream);
         Ebml_StartSubElement(glob, &start, Cues);