Merge "support changing resolution with vpx_codec_enc_config_set"
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 15134ab..78a1cee 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -391,6 +391,7 @@
 ASFLAGS = ${ASFLAGS}
 extralibs = ${extralibs}
 AS_SFX    = ${AS_SFX:-.asm}
+EXE_SFX   = ${EXE_SFX}
 RTCD_OPTIONS = ${RTCD_OPTIONS}
 EOF
 
@@ -540,6 +541,7 @@
     STRIP=${STRIP:-${CROSS}strip}
     NM=${NM:-${CROSS}nm}
         AS_SFX=.s
+        EXE_SFX=
 }
 
 process_common_toolchain() {
@@ -593,6 +595,9 @@
             *solaris2.10)
                 tgt_os=solaris
                 ;;
+            *os2*)
+                tgt_os=os2
+                ;;
         esac
 
         if [ -n "$tgt_isa" ] && [ -n "$tgt_os" ]; then
@@ -919,6 +924,9 @@
                 LD=${LD:-${CROSS}gcc}
                 CROSS=${CROSS:-g}
                 ;;
+            os2)
+                AS=${AS:-nasm}
+                ;;
         esac
 
         AS="${alt_as:-${AS:-auto}}"
@@ -989,6 +997,11 @@
                 # enabled icc && ! enabled pic && add_cflags -fno-pic -mdynamic-no-pic
                 enabled icc && ! enabled pic && add_cflags -fno-pic
             ;;
+            os2)
+                add_asflags -f aout
+                enabled debug && add_asflags -g
+                EXE_SFX=.exe
+            ;;
             *) log "Warning: Unknown os $tgt_os while setting up $AS flags"
             ;;
         esac
diff --git a/configure b/configure
index 7ecd72e..4ac97b1 100755
--- a/configure
+++ b/configure
@@ -109,6 +109,7 @@
 all_platforms="${all_platforms} x86-darwin10-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
+all_platforms="${all_platforms} x86-os2-gcc"
 all_platforms="${all_platforms} x86-solaris-gcc"
 all_platforms="${all_platforms} x86-win32-gcc"
 all_platforms="${all_platforms} x86-win32-vs7"
diff --git a/examples.mk b/examples.mk
index f6c9045..518608d 100644
--- a/examples.mk
+++ b/examples.mk
@@ -168,12 +168,12 @@
 # Create build/install dependencies for all examples. The common case
 # is handled here. The MSVS case is handled below.
 NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
-DIST-BINS-$(NOT_MSVS)      += $(addprefix bin/,$(ALL_EXAMPLES:.c=))
-INSTALL-BINS-$(NOT_MSVS)   += $(addprefix bin/,$(UTILS:.c=))
+DIST-BINS-$(NOT_MSVS)      += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
+INSTALL-BINS-$(NOT_MSVS)   += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
 DIST-SRCS-yes              += $(ALL_SRCS)
 INSTALL-SRCS-yes           += $(UTIL_SRCS)
 OBJS-$(NOT_MSVS)           += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
-BINS-$(NOT_MSVS)           += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=))
+BINS-$(NOT_MSVS)           += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))
 
 
 # Instantiate linker template for all examples.
@@ -183,7 +183,7 @@
     $(if $(BUILD_OBJS),$(eval $(bin):\
         $(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
     $(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
-        $(call objs,$($(notdir $(bin)).SRCS)) \
+        $(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
         -l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
         )))\
     $(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
diff --git a/tools_common.c b/tools_common.c
index d188bbe..6f95028 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -9,15 +9,21 @@
  */
 #include <stdio.h>
 #include "tools_common.h"
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__OS2__)
 #include <io.h>
 #include <fcntl.h>
+
+#ifdef __OS2__
+#define _setmode    setmode
+#define _fileno     fileno
+#define _O_BINARY   O_BINARY
+#endif
 #endif
 
 FILE* set_binary_mode(FILE *stream)
 {
     (void)stream;
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__OS2__)
     _setmode(_fileno(stream), _O_BINARY);
 #endif
     return stream;
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index c009cbd..39660ab 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -19,11 +19,15 @@
 #include "vp8/common/onyxc_int.h"
 
 #if CONFIG_MULTITHREAD
-#if HAVE_UNISTD_H
+#if HAVE_UNISTD_H && !defined(__OS2__)
 #include <unistd.h>
 #elif defined(_WIN32)
 #include <windows.h>
 typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
+#elif defined(__OS2__)
+#define INCL_DOS
+#define INCL_DOSSPINLOCK
+#include <os2.h>
 #endif
 #endif
 
@@ -32,7 +36,7 @@
 {
     int core_count = 16;
 
-#if HAVE_UNISTD_H
+#if HAVE_UNISTD_H && !defined(__OS2__)
 #if defined(_SC_NPROCESSORS_ONLN)
     core_count = sysconf(_SC_NPROCESSORS_ONLN);
 #elif defined(_SC_NPROC_ONLN)
@@ -55,6 +59,21 @@
 
         core_count = sysinfo.dwNumberOfProcessors;
     }
+#elif defined(__OS2__)
+    {
+        ULONG proc_id;
+        ULONG status;
+
+        core_count = 0;
+        for (proc_id = 1; ; proc_id++)
+        {
+            if (DosGetProcessorStatus(proc_id, &status))
+                break;
+
+            if (status == PROC_ONLINE)
+                core_count++;
+        }
+    }
 #else
     /* other platforms */
 #endif
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 0dc14b8..401989b 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -19,6 +19,7 @@
 #include "systemdependent.h"
 #include "../encoder/variance.h"
 
+#include <limits.h>
 #include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -729,17 +730,17 @@
     if (blksize == 16)
     {
         act = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
-        sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, 0)+128)>>8;
+        sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8;
     }
     else if (blksize == 8)
     {
         act = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
-        sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, 0)+32)>>6;
+        sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6;
     }
     else
     {
         act = (vp8_variance4x4(yd, yd_stride, VP8_ZEROS, 0, &sse)+8)>>4;
-        sad = (vp8_sad4x4(y, y_stride, yd, yd_stride, 0)+8)>>4;
+        sad = (vp8_sad4x4(y, y_stride, yd, yd_stride, INT_MAX)+8)>>4;
     }
     /* thr = qdiff/8 + log2(act) + log4(qprev) */
     thr = (qdiff>>3);
@@ -930,10 +931,16 @@
     {
         if ((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK))
         {
-            if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer_int, oci->Width, oci->Height, VP8BORDERINPIXELS) >= 0)
-            {
-                oci->post_proc_buffer_int_used = 1;
-            }
+            int width = (oci->Width + 15) & ~15;
+            int height = (oci->Height + 15) & ~15;
+
+            if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer_int,
+                                            width, height, VP8BORDERINPIXELS))
+                vpx_internal_error(&oci->error, VPX_CODEC_MEM_ERROR,
+                                   "Failed to allocate MFQE framebuffer");
+
+            oci->post_proc_buffer_int_used = 1;
+
             // insure that postproc is set to all 0's so that post proc
             // doesn't pull random data in from edge
             vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,126,(&oci->post_proc_buffer)->frame_size);
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index da6347d..ed9e3e6 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -33,6 +33,29 @@
 #define pthread_getspecific(ts_key) TlsGetValue(ts_key)
 #define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value)
 #define pthread_self() GetCurrentThreadId()
+
+#elif defined(__OS2__)
+/* OS/2 */
+#define INCL_DOS
+#include <os2.h>
+
+#include <stdlib.h>
+#define THREAD_FUNCTION void
+#define THREAD_FUNCTION_RETURN void
+#define THREAD_SPECIFIC_INDEX PULONG
+#define pthread_t TID
+#define pthread_attr_t ULONG
+#define pthread_create(thhandle,attr,thfunc,tharg) \
+    ((int)((*(thhandle)=_beginthread(thfunc,NULL,1024*1024,tharg))==-1))
+#define pthread_join(thread, result) ((int)DosWaitThread(&(thread),0))
+#define pthread_detach(thread) 0
+#define thread_sleep(nms) DosSleep(nms)
+#define pthread_cancel(thread) DosKillThread(thread)
+#define ts_key_create(ts_key, destructor) \
+    DosAllocThreadLocalMemory(1, &(ts_key));
+#define pthread_getspecific(ts_key) ((void *)(*(ts_key)))
+#define pthread_setspecific(ts_key, value) (*(ts_key)=(ULONG)(value))
+#define pthread_self() _gettid()
 #else
 #ifdef __APPLE__
 #include <mach/mach_init.h>
@@ -64,6 +87,76 @@
 #define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
 #define thread_sleep(nms) Sleep(nms)
 
+#elif defined(__OS2__)
+typedef struct
+{
+    HEV  event;
+    HMTX wait_mutex;
+    HMTX count_mutex;
+    int  count;
+} sem_t;
+
+static inline int sem_init(sem_t *sem, int pshared, unsigned int value)
+{
+    DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0,
+                      value > 0 ? TRUE : FALSE);
+    DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE);
+    DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE);
+
+    sem->count = value;
+
+    return 0;
+}
+
+static inline int sem_wait(sem_t * sem)
+{
+    DosRequestMutexSem(sem->wait_mutex, -1);
+
+    DosWaitEventSem(sem->event, -1);
+
+    DosRequestMutexSem(sem->count_mutex, -1);
+
+    sem->count--;
+    if (sem->count == 0)
+    {
+        ULONG post_count;
+
+        DosResetEventSem(sem->event, &post_count);
+    }
+
+    DosReleaseMutexSem(sem->count_mutex);
+
+    DosReleaseMutexSem(sem->wait_mutex);
+
+    return 0;
+}
+
+static inline int sem_post(sem_t * sem)
+{
+    DosRequestMutexSem(sem->count_mutex, -1);
+
+    if (sem->count < 32768)
+    {
+        sem->count++;
+        DosPostEventSem(sem->event);
+    }
+
+    DosReleaseMutexSem(sem->count_mutex);
+
+    return 0;
+}
+
+static inline int sem_destroy(sem_t * sem)
+{
+    DosCloseEventSem(sem->event);
+    DosCloseMutexSem(sem->wait_mutex);
+    DosCloseMutexSem(sem->count_mutex);
+
+    return 0;
+}
+
+#define thread_sleep(nms) DosSleep(nms)
+
 #else
 
 #ifdef __APPLE__
diff --git a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
index 5feaa8b..dc84c30 100644
--- a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
@@ -144,7 +144,7 @@
     ldr     r6, [sp, #40]       ; get address of sse
     mul     r0, r8, r8          ; sum * sum
     str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
 
     ldmfd   sp!, {r4-r12, pc}
 
diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
index 1b54897..dd2ce68 100644
--- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
@@ -169,7 +169,7 @@
     ldr     r6, [sp, #40]       ; get address of sse
     mul     r0, r8, r8          ; sum * sum
     str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
 
     ldmfd   sp!, {r4-r12, pc}
 
diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
index 38c55ed..f972d9b 100644
--- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
@@ -210,7 +210,7 @@
     ldr     r6, [sp, #40]       ; get address of sse
     mul     r0, r8, r8          ; sum * sum
     str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
 
     ldmfd   sp!, {r4-r12, pc}
 
diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
index 22a50eb..f5da9c0 100644
--- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
@@ -171,7 +171,7 @@
     ldr     r6, [sp, #40]       ; get address of sse
     mul     r0, r8, r8          ; sum * sum
     str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
 
     ldmfd   sp!, {r4-r12, pc}
 
diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm
index e1a4686..e3b4832 100644
--- a/vp8/encoder/arm/neon/variance_neon.asm
+++ b/vp8/encoder/arm/neon/variance_neon.asm
@@ -77,14 +77,14 @@
     ;vmov.32        r1, d1[0]
     ;mul            r0, r0, r0
     ;str            r1, [r12]
-    ;sub            r0, r1, r0, asr #8
+    ;sub            r0, r1, r0, lsr #8
 
-    ;sum is in [-255x256, 255x256]. sumxsum is 32-bit. Shift to right should
-    ;have sign-bit exension, which is vshr.s. Have to use s32 to make it right.
+    ; while sum is signed, sum * sum is always positive and must be treated as
+    ; unsigned to avoid propagating the sign bit.
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [r12]              ;store sse
-    vshr.s32        d10, d10, #8
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #8
+    vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
     bx              lr
@@ -145,8 +145,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [r12]              ;store sse
-    vshr.s32        d10, d10, #7
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #7
+    vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
     bx              lr
@@ -200,8 +200,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [r12]              ;store sse
-    vshr.s32        d10, d10, #7
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #7
+    vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
     bx              lr
@@ -265,8 +265,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [r12]              ;store sse
-    vshr.s32        d10, d10, #6
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #6
+    vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
     bx              lr
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
index 5107d8b..d753ad1 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -405,8 +405,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [r6]               ;store sse
-    vshr.s32        d10, d10, #8
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #8
+    vsub.u32        d0, d1, d10
 
     add             sp, sp, #528
     vmov.32         r0, d0[0]                   ;return
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 0a2b71c..155be4f 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -112,8 +112,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [lr]               ;store sse
-    vshr.s32        d10, d10, #8
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #8
+    vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
     pop             {pc}
@@ -208,8 +208,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [lr]               ;store sse
-    vshr.s32        d10, d10, #8
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #8
+    vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
     pop             {pc}
@@ -327,8 +327,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [lr]               ;store sse
-    vshr.s32        d10, d10, #8
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #8
+    vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
     pop             {pc}
@@ -560,8 +560,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [lr]               ;store sse
-    vshr.s32        d10, d10, #8
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #8
+    vsub.u32        d0, d1, d10
 
     add             sp, sp, #256
     vmov.32         r0, d0[0]                   ;return
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
index 38b5878..cc7ae52 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -206,8 +206,8 @@
 
     vmull.s32       q5, d0, d0
     vst1.32         {d1[0]}, [lr]               ;store sse
-    vshr.s32        d10, d10, #6
-    vsub.s32        d0, d1, d10
+    vshr.u32        d10, d10, #6
+    vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
     pop             {r4-r5, pc}
diff --git a/vp8/encoder/ppc/variance_altivec.asm b/vp8/encoder/ppc/variance_altivec.asm
index a1ebf66..fb8d5bb 100644
--- a/vp8/encoder/ppc/variance_altivec.asm
+++ b/vp8/encoder/ppc/variance_altivec.asm
@@ -98,7 +98,7 @@
     stw     r4, 0(r7)           ;# sse
 
     mullw   r3, r3, r3          ;# sum*sum
-    srawi   r3, r3, \DS         ;# (sum*sum) >> DS
+    srlwi   r3, r3, \DS         ;# (sum*sum) >> DS
     subf    r3, r3, r4          ;# sse - ((sum*sum) >> DS)
 .endm
 
@@ -142,7 +142,7 @@
     stw     r4, 0(r7)           ;# sse
 
     mullw   r3, r3, r3          ;# sum*sum
-    srawi   r3, r3, \DS         ;# (sum*sum) >> 8
+    srlwi   r3, r3, \DS         ;# (sum*sum) >> 8
     subf    r3, r3, r4          ;# sse - ((sum*sum) >> 8)
 .endm
 
@@ -367,7 +367,7 @@
     stw     r4, 0(r7)           ;# sse
 
     mullw   r3, r3, r3          ;# sum*sum
-    srawi   r3, r3, 4           ;# (sum*sum) >> 4
+    srlwi   r3, r3, 4           ;# (sum*sum) >> 4
     subf    r3, r3, r4          ;# sse - ((sum*sum) >> 4)
 
     epilogue
diff --git a/vp8/encoder/ppc/variance_subpixel_altivec.asm b/vp8/encoder/ppc/variance_subpixel_altivec.asm
index 301360b..2308373 100644
--- a/vp8/encoder/ppc/variance_subpixel_altivec.asm
+++ b/vp8/encoder/ppc/variance_subpixel_altivec.asm
@@ -157,7 +157,7 @@
     stw     r4, 0(r9)           ;# sse
 
     mullw   r3, r3, r3          ;# sum*sum
-    srawi   r3, r3, \DS         ;# (sum*sum) >> 8
+    srlwi   r3, r3, \DS         ;# (sum*sum) >> 8
     subf    r3, r3, r4          ;# sse - ((sum*sum) >> 8)
 .endm
 
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index c7b9c22..dbcbc7a 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -75,7 +75,7 @@
 
     variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
     *sse = var;
-    return (var - ((avg * avg) >> 8));
+    return (var - ((unsigned int)(avg * avg) >> 8));
 }
 
 unsigned int vp8_variance8x16_c(
@@ -91,7 +91,7 @@
 
     variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
     *sse = var;
-    return (var - ((avg * avg) >> 7));
+    return (var - ((unsigned int)(avg * avg) >> 7));
 }
 
 unsigned int vp8_variance16x8_c(
@@ -107,7 +107,7 @@
 
     variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
     *sse = var;
-    return (var - ((avg * avg) >> 7));
+    return (var - ((unsigned int)(avg * avg) >> 7));
 }
 
 
@@ -124,7 +124,7 @@
 
     variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
     *sse = var;
-    return (var - ((avg * avg) >> 6));
+    return (var - ((unsigned int)(avg * avg) >> 6));
 }
 
 unsigned int vp8_variance4x4_c(
@@ -140,7 +140,7 @@
 
     variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
     *sse = var;
-    return (var - ((avg * avg) >> 4));
+    return (var - ((unsigned int)(avg * avg) >> 4));
 }
 
 
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index e2524b4..2d07df9 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -91,7 +91,7 @@
 
     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
     *sse = var;
-    return (var - ((avg * avg) >> 4));
+    return (var - ((unsigned int)(avg * avg) >> 4));
 
 }
 
@@ -108,7 +108,7 @@
     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
     *sse = var;
 
-    return (var - ((avg * avg) >> 6));
+    return (var - ((unsigned int)(avg * avg) >> 6));
 
 }
 
@@ -153,7 +153,7 @@
     var = sse0 + sse1 + sse2 + sse3;
     avg = sum0 + sum1 + sum2 + sum3;
     *sse = var;
-    return (var - ((avg * avg) >> 8));
+    return (var - ((unsigned int)(avg * avg) >> 8));
 }
 
 unsigned int vp8_variance16x8_mmx(
@@ -172,7 +172,7 @@
     var = sse0 + sse1;
     avg = sum0 + sum1;
     *sse = var;
-    return (var - ((avg * avg) >> 7));
+    return (var - ((unsigned int)(avg * avg) >> 7));
 
 }
 
@@ -194,7 +194,7 @@
     avg = sum0 + sum1;
     *sse = var;
 
-    return (var - ((avg * avg) >> 7));
+    return (var - ((unsigned int)(avg * avg) >> 7));
 
 }
 
@@ -219,7 +219,7 @@
         &xsum, &xxsum
     );
     *sse = xxsum;
-    return (xxsum - ((xsum * xsum) >> 4));
+    return (xxsum - ((unsigned int)(xsum * xsum) >> 4));
 }
 
 
@@ -244,7 +244,7 @@
         &xsum, &xxsum
     );
     *sse = xxsum;
-    return (xxsum - ((xsum * xsum) >> 6));
+    return (xxsum - ((unsigned int)(xsum * xsum) >> 6));
 }
 
 unsigned int vp8_sub_pixel_variance16x16_mmx
@@ -282,7 +282,7 @@
     xxsum0 += xxsum1;
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
 
 
 }
@@ -335,7 +335,7 @@
     xxsum0 += xxsum1;
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 7));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7));
 }
 
 unsigned int vp8_sub_pixel_variance8x16_mmx
@@ -358,7 +358,7 @@
         &xsum, &xxsum
     );
     *sse = xxsum;
-    return (xxsum - ((xsum * xsum) >> 7));
+    return (xxsum - ((unsigned int)(xsum * xsum) >> 7));
 }
 
 
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 39213b0..da0bd48 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -148,7 +148,7 @@
 
     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
     *sse = var;
-    return (var - ((avg * avg) >> 4));
+    return (var - ((unsigned int)(avg * avg) >> 4));
 
 }
 
@@ -165,7 +165,7 @@
 
     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
     *sse = var;
-    return (var - ((avg * avg) >> 6));
+    return (var - ((unsigned int)(avg * avg) >> 6));
 
 }
 
@@ -184,7 +184,7 @@
 
     vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
     *sse = sse0;
-    return (sse0 - ((sum0 * sum0) >> 8));
+    return (sse0 - ((unsigned int)(sum0 * sum0) >> 8));
 }
 unsigned int vp8_mse16x16_wmt(
     const unsigned char *src_ptr,
@@ -220,7 +220,7 @@
     var = sse0 + sse1;
     avg = sum0 + sum1;
     *sse = var;
-    return (var - ((avg * avg) >> 7));
+    return (var - ((unsigned int)(avg * avg) >> 7));
 
 }
 
@@ -241,7 +241,7 @@
     var = sse0 + sse1;
     avg = sum0 + sum1;
     *sse = var;
-    return (var - ((avg * avg) >> 7));
+    return (var - ((unsigned int)(avg * avg) >> 7));
 
 }
 
@@ -265,7 +265,7 @@
         &xsum, &xxsum
     );
     *sse = xxsum;
-    return (xxsum - ((xsum * xsum) >> 4));
+    return (xxsum - ((unsigned int)(xsum * xsum) >> 4));
 }
 
 
@@ -314,7 +314,7 @@
     }
 
     *sse = xxsum;
-    return (xxsum - ((xsum * xsum) >> 6));
+    return (xxsum - ((unsigned int)(xsum * xsum) >> 6));
 }
 
 unsigned int vp8_sub_pixel_variance16x16_wmt
@@ -375,7 +375,7 @@
     }
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
 }
 
 unsigned int vp8_sub_pixel_mse16x16_wmt(
@@ -446,7 +446,7 @@
     }
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 7));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7));
 }
 
 unsigned int vp8_sub_pixel_variance8x16_wmt
@@ -494,7 +494,7 @@
     }
 
     *sse = xxsum;
-    return (xxsum - ((xsum * xsum) >> 7));
+    return (xxsum - ((unsigned int)(xsum * xsum) >> 7));
 }
 
 
@@ -514,7 +514,7 @@
         &xsum0, &xxsum0);
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
 }
 
 
@@ -533,7 +533,7 @@
         &xsum0, &xxsum0);
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
 }
 
 
@@ -553,5 +553,5 @@
         &xsum0, &xxsum0);
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
 }
diff --git a/vp8/encoder/x86/variance_ssse3.c b/vp8/encoder/x86/variance_ssse3.c
index 73f2e01..8b8b87e 100644
--- a/vp8/encoder/x86/variance_ssse3.c
+++ b/vp8/encoder/x86/variance_ssse3.c
@@ -112,7 +112,7 @@
     }
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
 }
 
 unsigned int vp8_sub_pixel_variance16x8_ssse3
@@ -161,5 +161,5 @@
     }
 
     *sse = xxsum0;
-    return (xxsum0 - ((xsum0 * xsum0) >> 7));
+    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7));
 }
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 7382a91..cef6a0b 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -22,6 +22,8 @@
 %define ABI_IS_32BIT 1
 %elifidn __OUTPUT_FORMAT__,win32
 %define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,aout
+%define ABI_IS_32BIT 1
 %else
 %define ABI_IS_32BIT 0
 %endif
@@ -314,6 +316,8 @@
 %macro SECTION_RODATA 0
 section .text
 %endmacro
+%elifidn __OUTPUT_FORMAT__,aout
+%define SECTION_RODATA section .data
 %else
 %define SECTION_RODATA section .rodata
 %endif