Merge "Removed unused vp8_recon_intra4x4mb function"
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
index 83921f8..69e1bdf 100644
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -11,13 +11,13 @@
 
 #include "vpx_ports/config.h"
 #include "vpx_ports/arm.h"
-#include "g_common.h"
-#include "pragmas.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
+#include "vp8/common/g_common.h"
+#include "vp8/common/pragmas.h"
+#include "vp8/common/subpixel.h"
+#include "vp8/common/loopfilter.h"
+#include "vp8/common/recon.h"
+#include "vp8/common/idct.h"
+#include "vp8/common/onyxc_int.h"
 
 extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
 extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index 6a46ef6..7340e20 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -10,9 +10,9 @@
 
 
 #include <math.h>
-#include "filter.h"
-#include "subpixel.h"
-#include "arm/bilinearfilter_arm.h"
+#include "vp8/common/filter.h"
+#include "vp8/common/subpixel.h"
+#include "bilinearfilter_arm.h"
 
 void vp8_filter_block2d_bil_armv6
 (
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 2612fc1..fe3c5a5 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -11,8 +11,8 @@
 
 #include "vpx_ports/config.h"
 #include <math.h>
-#include "filter.h"
-#include "subpixel.h"
+#include "vp8/common/filter.h"
+#include "vp8/common/subpixel.h"
 #include "vpx_ports/mem.h"
 
 extern void vp8_filter_block2d_first_pass_armv6
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index 684a7f1..3532a03 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -11,8 +11,8 @@
 
 #include "vpx_ports/config.h"
 #include <math.h>
-#include "loopfilter.h"
-#include "onyxc_int.h"
+#include "vp8/common/loopfilter.h"
+#include "vp8/common/onyxc_int.h"
 
 extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
 extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
diff --git a/vp8/common/arm/neon/recon_neon.c b/vp8/common/arm/neon/recon_neon.c
index f7930ee..09fd2a5 100644
--- a/vp8/common/arm/neon/recon_neon.c
+++ b/vp8/common/arm/neon/recon_neon.c
@@ -10,8 +10,8 @@
 
 
 #include "vpx_ports/config.h"
-#include "recon.h"
-#include "blockd.h"
+#include "vp8/common/recon.h"
+#include "vp8/common/blockd.h"
 
 extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
 
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index 4cc93d1..f8f4dca 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -10,10 +10,10 @@
 
 
 #include "vpx_ports/config.h"
-#include "blockd.h"
-#include "reconintra.h"
+#include "vp8/common/blockd.h"
+#include "vp8/common/reconintra.h"
 #include "vpx_mem/vpx_mem.h"
-#include "recon.h"
+#include "vp8/common/recon.h"
 
 #if HAVE_ARMV7
 extern void vp8_build_intra_predictors_mby_neon_func(
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index b3eadaf..c843d86 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -10,12 +10,12 @@
 
 
 #include "vpx_ports/config.h"
-#include "g_common.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
+#include "vp8/common/g_common.h"
+#include "vp8/common/subpixel.h"
+#include "vp8/common/loopfilter.h"
+#include "vp8/common/recon.h"
+#include "vp8/common/idct.h"
+#include "vp8/common/onyxc_int.h"
 
 extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
 extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index 3621fe1..5837bc0 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_ports/config.h"
-#include "loopfilter.h"
+#include "vp8/common/loopfilter.h"
 
 prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
 prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 8dd07c9..7904006 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -11,7 +11,7 @@
 
 #include "vpx_ports/config.h"
 #include "vpx_ports/mem.h"
-#include "subpixel.h"
+#include "vp8/common/subpixel.h"
 
 extern const short vp8_six_tap_mmx[8][6*8];
 extern const short vp8_bilinear_filters_mmx[8][2*8];
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 38500fd..e89c07a 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -11,13 +11,13 @@
 
 #include "vpx_ports/config.h"
 #include "vpx_ports/x86.h"
-#include "g_common.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "pragmas.h"
-#include "onyxc_int.h"
+#include "vp8/common/g_common.h"
+#include "vp8/common/subpixel.h"
+#include "vp8/common/loopfilter.h"
+#include "vp8/common/recon.h"
+#include "vp8/common/idct.h"
+#include "vp8/common/pragmas.h"
+#include "vp8/common/onyxc_int.h"
 
 void vp8_arch_x86_common_init(VP8_COMMON *ctx)
 {
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
index 02e45d1..51e901d 100644
--- a/vp8/decoder/arm/arm_dsystemdependent.c
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -11,11 +11,11 @@
 
 #include "vpx_ports/config.h"
 #include "vpx_ports/arm.h"
-#include "blockd.h"
-#include "pragmas.h"
-#include "postproc.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
+#include "vp8/common/blockd.h"
+#include "vp8/common/pragmas.h"
+#include "vp8/common/postproc.h"
+#include "vp8/decoder/dequantize.h"
+#include "vp8/decoder/onyxd_int.h"
 
 void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
 {
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
index 3c7bc50..57c3446 100644
--- a/vp8/decoder/arm/armv6/idct_blk_v6.c
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@@ -9,8 +9,8 @@
  */
 
 #include "vpx_ports/config.h"
-#include "idct.h"
-#include "dequantize.h"
+#include "vp8/common/idct.h"
+#include "vp8/decoder/dequantize.h"
 
 void vp8_dequant_dc_idct_add_y_block_v6
             (short *q, short *dq, unsigned char *pre,
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index b3e14b7..b26af97 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -10,9 +10,9 @@
 
 
 #include "vpx_ports/config.h"
-#include "dequantize.h"
-#include "predictdc.h"
-#include "idct.h"
+#include "vp8/decoder/dequantize.h"
+#include "vp8/common/predictdc.h"
+#include "vp8/common/idct.h"
 #include "vpx_mem/vpx_mem.h"
 
 #if HAVE_ARMV7
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
index fe4f2e0..ee35004 100644
--- a/vp8/decoder/arm/neon/idct_blk_neon.c
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -9,8 +9,8 @@
  */
 
 #include "vpx_ports/config.h"
-#include "idct.h"
-#include "dequantize.h"
+#include "vp8/common/idct.h"
+#include "vp8/decoder/dequantize.h"
 
 /* place these declarations here because we don't want to maintain them
  * outside of this scope
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 203d72d..ded5fa4 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -10,10 +10,10 @@
 
 
 #include "treereader.h"
-#include "entropymv.h"
-#include "entropymode.h"
+#include "vp8/common/entropymv.h"
+#include "vp8/common/entropymode.h"
 #include "onyxd_int.h"
-#include "findnearmv.h"
+#include "vp8/common/findnearmv.h"
 
 #if CONFIG_DEBUG
 #include <assert.h>
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index f4bb664..5769100 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -10,28 +10,28 @@
 
 
 #include "onyxd_int.h"
-#include "header.h"
-#include "reconintra.h"
-#include "reconintra4x4.h"
-#include "recon.h"
-#include "reconinter.h"
+#include "vp8/common/header.h"
+#include "vp8/common/reconintra.h"
+#include "vp8/common/reconintra4x4.h"
+#include "vp8/common/recon.h"
+#include "vp8/common/reconinter.h"
 #include "dequantize.h"
 #include "detokenize.h"
-#include "invtrans.h"
-#include "alloccommon.h"
-#include "entropymode.h"
-#include "quant_common.h"
+#include "vp8/common/invtrans.h"
+#include "vp8/common/alloccommon.h"
+#include "vp8/common/entropymode.h"
+#include "vp8/common/quant_common.h"
 #include "vpx_scale/vpxscale.h"
 #include "vpx_scale/yv12extend.h"
-#include "setupintrarecon.h"
+#include "vp8/common/setupintrarecon.h"
 
 #include "decodemv.h"
-#include "extend.h"
+#include "vp8/common/extend.h"
 #include "vpx_mem/vpx_mem.h"
-#include "idct.h"
+#include "vp8/common/idct.h"
 #include "dequantize.h"
-#include "predictdc.h"
-#include "threading.h"
+#include "vp8/common/predictdc.h"
+#include "vp8/common/threading.h"
 #include "decoderthreading.h"
 #include "dboolhuff.h"
 
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 84a9fd9..4e1a5e1 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -11,8 +11,8 @@
 
 #include "vpx_ports/config.h"
 #include "dequantize.h"
-#include "predictdc.h"
-#include "idct.h"
+#include "vp8/common/predictdc.h"
+#include "vp8/common/idct.h"
 #include "vpx_mem/vpx_mem.h"
 
 extern void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) ;
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index b78e39c..2e662a5 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -11,7 +11,7 @@
 
 #ifndef DEQUANTIZE_H
 #define DEQUANTIZE_H
-#include "blockd.h"
+#include "vp8/common/blockd.h"
 
 #define prototype_dequant_block(sym) \
     void sym(BLOCKD *x)
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index e529db1..83fa14d 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -9,8 +9,8 @@
  */
 
 
-#include "type_aliases.h"
-#include "blockd.h"
+#include "vp8/common/type_aliases.h"
+#include "vp8/common/blockd.h"
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 709ec59..2406dea 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -10,8 +10,8 @@
 
 
 #include "vpx_ports/config.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
+#include "vp8/decoder/dequantize.h"
+#include "vp8/decoder/onyxd_int.h"
 
 extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
 extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
index c98bd5b..df01923 100644
--- a/vp8/decoder/idct_blk.c
+++ b/vp8/decoder/idct_blk.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_ports/config.h"
-#include "idct.h"
+#include "vp8/common/idct.h"
 #include "dequantize.h"
 
 void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 9d49c9e..bfb00a5 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -9,25 +9,25 @@
  */
 
 
-#include "onyxc_int.h"
+#include "vp8/common/onyxc_int.h"
 #if CONFIG_POSTPROC
-#include "postproc.h"
+#include "vp8/common/postproc.h"
 #endif
-#include "onyxd.h"
+#include "vp8/common/onyxd.h"
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
-#include "alloccommon.h"
+#include "vp8/common/alloccommon.h"
 #include "vpx_scale/yv12extend.h"
-#include "loopfilter.h"
-#include "swapyv12buffer.h"
-#include "g_common.h"
-#include "threading.h"
+#include "vp8/common/loopfilter.h"
+#include "vp8/common/swapyv12buffer.h"
+#include "vp8/common/g_common.h"
+#include "vp8/common/threading.h"
 #include "decoderthreading.h"
 #include <stdio.h>
 
-#include "quant_common.h"
+#include "vp8/common/quant_common.h"
 #include "vpx_scale/vpxscale.h"
-#include "systemdependent.h"
+#include "vp8/common/systemdependent.h"
 #include "vpx_ports/vpx_timer.h"
 #include "detokenize.h"
 #if ARCH_ARM
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 09a8976..ac1e332 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -12,10 +12,10 @@
 #ifndef __INC_VP8D_INT_H
 #define __INC_VP8D_INT_H
 #include "vpx_ports/config.h"
-#include "onyxd.h"
+#include "vp8/common/onyxd.h"
 #include "treereader.h"
-#include "onyxc_int.h"
-#include "threading.h"
+#include "vp8/common/onyxc_int.h"
+#include "vp8/common/threading.h"
 #include "dequantize.h"
 
 typedef struct
diff --git a/vp8/decoder/reconintra_mt.c b/vp8/decoder/reconintra_mt.c
index 854aba3..b9d2b37 100644
--- a/vp8/decoder/reconintra_mt.c
+++ b/vp8/decoder/reconintra_mt.c
@@ -10,8 +10,8 @@
 
 
 #include "vpx_ports/config.h"
-#include "recon.h"
-#include "reconintra.h"
+#include "vp8/common/recon.h"
+#include "vp8/common/reconintra.h"
 #include "vpx_mem/vpx_mem.h"
 #include "onyxd_int.h"
 
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 271249a..7fc9010 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -17,13 +17,13 @@
 #endif
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
-#include "threading.h"
+#include "vp8/common/threading.h"
 
-#include "loopfilter.h"
-#include "extend.h"
+#include "vp8/common/loopfilter.h"
+#include "vp8/common/extend.h"
 #include "vpx_ports/vpx_timer.h"
 #include "detokenize.h"
-#include "reconinter.h"
+#include "vp8/common/reconinter.h"
 #include "reconintra_mt.h"
 
 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
@@ -473,22 +473,16 @@
 
     if (pbi->b_multithreaded_rd)
     {
-        if (pbi->mt_current_mb_col)
-        {
             vpx_free(pbi->mt_current_mb_col);
             pbi->mt_current_mb_col = NULL ;
-        }
 
         /* Free above_row buffers. */
         if (pbi->mt_yabove_row)
         {
             for (i=0; i< mb_rows; i++)
             {
-                if (pbi->mt_yabove_row[i])
-                {
                     vpx_free(pbi->mt_yabove_row[i]);
                     pbi->mt_yabove_row[i] = NULL ;
-                }
             }
             vpx_free(pbi->mt_yabove_row);
             pbi->mt_yabove_row = NULL ;
@@ -498,11 +492,8 @@
         {
             for (i=0; i< mb_rows; i++)
             {
-                if (pbi->mt_uabove_row[i])
-                {
                     vpx_free(pbi->mt_uabove_row[i]);
                     pbi->mt_uabove_row[i] = NULL ;
-                }
             }
             vpx_free(pbi->mt_uabove_row);
             pbi->mt_uabove_row = NULL ;
@@ -512,11 +503,8 @@
         {
             for (i=0; i< mb_rows; i++)
             {
-                if (pbi->mt_vabove_row[i])
-                {
                     vpx_free(pbi->mt_vabove_row[i]);
                     pbi->mt_vabove_row[i] = NULL ;
-                }
             }
             vpx_free(pbi->mt_vabove_row);
             pbi->mt_vabove_row = NULL ;
@@ -527,11 +515,8 @@
         {
             for (i=0; i< mb_rows; i++)
             {
-                if (pbi->mt_yleft_col[i])
-                {
                     vpx_free(pbi->mt_yleft_col[i]);
                     pbi->mt_yleft_col[i] = NULL ;
-                }
             }
             vpx_free(pbi->mt_yleft_col);
             pbi->mt_yleft_col = NULL ;
@@ -541,11 +526,8 @@
         {
             for (i=0; i< mb_rows; i++)
             {
-                if (pbi->mt_uleft_col[i])
-                {
                     vpx_free(pbi->mt_uleft_col[i]);
                     pbi->mt_uleft_col[i] = NULL ;
-                }
             }
             vpx_free(pbi->mt_uleft_col);
             pbi->mt_uleft_col = NULL ;
@@ -555,11 +537,8 @@
         {
             for (i=0; i< mb_rows; i++)
             {
-                if (pbi->mt_vleft_col[i])
-                {
                     vpx_free(pbi->mt_vleft_col[i]);
                     pbi->mt_vleft_col[i] = NULL ;
-                }
             }
             vpx_free(pbi->mt_vleft_col);
             pbi->mt_vleft_col = NULL ;
@@ -644,29 +623,17 @@
 
         sem_destroy(&pbi->h_event_end_decoding);
 
-        if (pbi->h_decoding_thread)
-        {
             vpx_free(pbi->h_decoding_thread);
             pbi->h_decoding_thread = NULL;
-        }
 
-        if (pbi->h_event_start_decoding)
-        {
             vpx_free(pbi->h_event_start_decoding);
             pbi->h_event_start_decoding = NULL;
-        }
 
-        if (pbi->mb_row_di)
-        {
             vpx_free(pbi->mb_row_di);
             pbi->mb_row_di = NULL ;
-        }
 
-        if (pbi->de_thread_data)
-        {
             vpx_free(pbi->de_thread_data);
             pbi->de_thread_data = NULL;
-        }
     }
 }
 
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
index 2778428..b50a4d2 100644
--- a/vp8/decoder/treereader.h
+++ b/vp8/decoder/treereader.h
@@ -12,7 +12,7 @@
 #ifndef tree_reader_h
 #define tree_reader_h 1
 
-#include "treecoder.h"
+#include "vp8/common/treecoder.h"
 
 #include "dboolhuff.h"
 
diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c
index 78c91d3..8f1a363 100644
--- a/vp8/decoder/x86/idct_blk_mmx.c
+++ b/vp8/decoder/x86/idct_blk_mmx.c
@@ -9,8 +9,8 @@
  */
 
 #include "vpx_ports/config.h"
-#include "idct.h"
-#include "dequantize.h"
+#include "vp8/common/idct.h"
+#include "vp8/decoder/dequantize.h"
 
 void vp8_dequant_dc_idct_add_y_block_mmx
             (short *q, short *dq, unsigned char *pre,
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
index 0273d6e..4c88db4 100644
--- a/vp8/decoder/x86/idct_blk_sse2.c
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -9,8 +9,8 @@
  */
 
 #include "vpx_ports/config.h"
-#include "idct.h"
-#include "dequantize.h"
+#include "vp8/common/idct.h"
+#include "vp8/decoder/dequantize.h"
 
 void idct_dequant_dc_0_2x_sse2
             (short *q, short *dq, unsigned char *pre,
diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c
index 50293c7..ed38e2b 100644
--- a/vp8/decoder/x86/onyxdxv.c
+++ b/vp8/decoder/x86/onyxdxv.c
@@ -21,19 +21,19 @@
 *  Header Files
 ****************************************************************************/
 #include <math.h>   // For Abs()
-#include "pragmas.h"
+#include "vp8/common/pragmas.h"
 
 #include "vpxdxv.h"
 #include "vpxdxv_plugin.h"
 
-#include "onyxd_int.h"
-#include "onyx.h"
-#include "codec_common_interface.h"
+#include "vp8/decoder/onyxd_int.h"
+#include "vp8/common/onyx.h"
+#include "vp8/common/codec_common_interface.h"
 #include "vpx_scale/vpxscale.h"
 #include "vpx_mem/vpx_mem.h"
-#include "postproc.h"
-#include "vpxblit.h"
-#include "g_common.h"
+#include "vp8/common/postproc.h"
+#include "vp8/common/vpxblit.h"
+#include "vp8/common/g_common.h"
 #include "vpx_scale/yv12extend.h"
 
 #include <limits.h>
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 47e346d..3f1b7d5 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -11,7 +11,7 @@
 
 #include "vpx_ports/config.h"
 #include "vpx_ports/x86.h"
-#include "onyxd_int.h"
+#include "vp8/decoder/onyxd_int.h"
 
 
 #if HAVE_MMX
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 6c17a79..5852afd 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -11,8 +11,8 @@
 
 #include "vpx_ports/config.h"
 #include "vpx_ports/arm.h"
-#include "variance.h"
-#include "onyx_int.h"
+#include "vp8/encoder/variance.h"
+#include "vp8/encoder/onyx_int.h"
 
 extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
 extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
@@ -29,8 +29,8 @@
 #if HAVE_ARMV6
     if (has_media)
     {
-        /*cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
+        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_armv6;
+        /*cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
         cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
         cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
         cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;*/
diff --git a/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
new file mode 100644
index 0000000..c759f7c
--- /dev/null
+++ b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
@@ -0,0 +1,84 @@
+;
+;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_sad16x16_armv6|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0    const unsigned char *src_ptr
+; r1    int  src_stride
+; r2    const unsigned char *ref_ptr
+; r3    int  ref_stride
+; stack max_sad (not used)
+|vp8_sad16x16_armv6| PROC
+    stmfd   sp!, {r4-r12, lr}
+    mov     r4, #0              ; sad = 0;
+    mov     r5, #8              ; loop count
+
+loop
+    ; 1st row
+    ldr     r6, [r0, #0x0]      ; load 4 src pixels (1A)
+    ldr     r8, [r2, #0x0]      ; load 4 ref pixels (1A)
+    ldr     r7, [r0, #0x4]      ; load 4 src pixels (1A)
+    ldr     r9, [r2, #0x4]      ; load 4 ref pixels (1A)
+    ldr     r10, [r0, #0x8]     ; load 4 src pixels (1B)
+    ldr     r11, [r0, #0xC]     ; load 4 src pixels (1B)
+
+    usada8  r4, r8, r6, r4      ; calculate sad for 4 pixels
+    usad8   r8, r7, r9          ; calculate sad for 4 pixels
+
+    ldr     r12, [r2, #0x8]     ; load 4 ref pixels (1B)
+    ldr     lr, [r2, #0xC]      ; load 4 ref pixels (1B)
+
+    add     r0, r0, r1          ; set src pointer to next row
+    add     r2, r2, r3          ; set dst pointer to next row
+
+    usada8  r4, r10, r12, r4    ; calculate sad for 4 pixels
+    usada8  r8, r11, lr, r8     ; calculate sad for 4 pixels
+
+    ldr     r6, [r0, #0x0]      ; load 4 src pixels (2A)
+    ldr     r7, [r0, #0x4]      ; load 4 src pixels (2A)
+    add     r4, r4, r8          ; add partial sad values
+
+    ; 2nd row
+    ldr     r8, [r2, #0x0]      ; load 4 ref pixels (2A)
+    ldr     r9, [r2, #0x4]      ; load 4 ref pixels (2A)
+    ldr     r10, [r0, #0x8]     ; load 4 src pixels (2B)
+    ldr     r11, [r0, #0xC]     ; load 4 src pixels (2B)
+
+    usada8  r4, r6, r8, r4      ; calculate sad for 4 pixels
+    usad8   r8, r7, r9          ; calculate sad for 4 pixels
+
+    ldr     r12, [r2, #0x8]     ; load 4 ref pixels (2B)
+    ldr     lr, [r2, #0xC]      ; load 4 ref pixels (2B)
+
+    add     r0, r0, r1          ; set src pointer to next row
+    add     r2, r2, r3          ; set dst pointer to next row
+
+    usada8  r4, r10, r12, r4    ; calculate sad for 4 pixels
+    usada8  r8, r11, lr, r8     ; calculate sad for 4 pixels
+
+    subs    r5, r5, #1          ; decrement loop counter
+    add     r4, r4, r8          ; add partial sad values
+
+    bne     loop
+
+    mov     r0, r4              ; return sad
+    ldmfd   sp!, {r4-r12, pc}
+
+    ENDP
+
+    END
+
diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c
index fe8e70c..9089663 100644
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -9,8 +9,8 @@
  */
 
 
-#include "boolhuff.h"
-#include "blockd.h"
+#include "vp8/encoder/boolhuff.h"
+#include "vp8/common/blockd.h"
 
 const unsigned int vp8_prob_cost[256] =
 {
diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c
index cc9e014..88ad3fc 100644
--- a/vp8/encoder/arm/encodemb_arm.c
+++ b/vp8/encoder/arm/encodemb_arm.c
@@ -9,13 +9,13 @@
  */
 
 
-#include "encodemb.h"
-#include "reconinter.h"
-#include "quantize.h"
-#include "invtrans.h"
-#include "recon.h"
-#include "reconintra.h"
-#include "dct.h"
+#include "vp8/encoder/encodemb.h"
+#include "vp8/common/reconinter.h"
+#include "vp8/encoder/quantize.h"
+#include "vp8/common/invtrans.h"
+#include "vp8/common/recon.h"
+#include "vp8/common/reconintra.h"
+#include "vp8/encoder/dct.h"
 #include "vpx_mem/vpx_mem.h"
 
 extern void vp8_subtract_b_neon_func(short *diff, unsigned char *src, unsigned char *pred, int stride, int pitch);
diff --git a/vp8/encoder/arm/picklpf_arm.c b/vp8/encoder/arm/picklpf_arm.c
index b2d8f2b..3fb370c 100644
--- a/vp8/encoder/arm/picklpf_arm.c
+++ b/vp8/encoder/arm/picklpf_arm.c
@@ -9,13 +9,13 @@
  */
 
 
-#include "onyxc_int.h"
-#include "onyx_int.h"
-#include "quantize.h"
+#include "vp8/common/onyxc_int.h"
+#include "vp8/encoder/onyx_int.h"
+#include "vp8/encoder/quantize.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_scale/yv12extend.h"
 #include "vpx_scale/vpxscale.h"
-#include "alloccommon.h"
+#include "vp8/common/alloccommon.h"
 
 extern void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
 
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 225feac..75ec205 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -12,9 +12,9 @@
 #include <math.h>
 #include "vpx_mem/vpx_mem.h"
 
-#include "quantize.h"
-#include "entropy.h"
-#include "predictdc.h"
+#include "vp8/encoder/quantize.h"
+#include "vp8/common/entropy.h"
+#include "vp8/common/predictdc.h"
 
 DECLARE_ALIGNED(16, const short, vp8_rvsplus1_default_zig_zag1d[16]) =
 {
diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c
index 9737bef..1cf9fd8 100644
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@@ -9,9 +9,9 @@
  */
 
 #include "vpx_config.h"
-#include "variance.h"
-#include "filter.h"
-#include "arm/bilinearfilter_arm.h"
+#include "vp8/encoder/variance.h"
+#include "vp8/common/filter.h"
+#include "vp8/common/arm/bilinearfilter_arm.h"
 
 #if HAVE_ARMV6
 
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 06d7287..c807e29 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -14,11 +14,15 @@
 
 #if HAVE_ARMV6
 
+extern prototype_sad(vp8_sad16x16_armv6);
 extern prototype_variance(vp8_variance16x16_armv6);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 
+#undef  vp8_variance_sad16x16
+#define vp8_variance_sad16x16 vp8_sad16x16_armv6
+
 #undef  vp8_variance_subpixvar16x16
 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 52ad0d6..2c7f788 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -9,15 +9,15 @@
  */
 
 
-#include "header.h"
+#include "vp8/common/header.h"
 #include "encodemv.h"
-#include "entropymode.h"
-#include "findnearmv.h"
+#include "vp8/common/entropymode.h"
+#include "vp8/common/findnearmv.h"
 #include "mcomp.h"
-#include "systemdependent.h"
+#include "vp8/common/systemdependent.h"
 #include <assert.h>
 #include <stdio.h>
-#include "pragmas.h"
+#include "vp8/common/pragmas.h"
 #include "vpx_mem/vpx_mem.h"
 #include "bitstream.h"
 
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 3ad40ef..2fd6782 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -12,10 +12,10 @@
 #ifndef __INC_BLOCK_H
 #define __INC_BLOCK_H
 
-#include "onyx.h"
-#include "blockd.h"
-#include "entropymv.h"
-#include "entropy.h"
+#include "vp8/common/onyx.h"
+#include "vp8/common/blockd.h"
+#include "vp8/common/entropymv.h"
+#include "vp8/common/entropy.h"
 #include "vpx_ports/mem.h"
 
 // motion search site
diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c
index 82006b1..788d2b0 100644
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -10,7 +10,7 @@
 
 
 #include "boolhuff.h"
-#include "blockd.h"
+#include "vp8/common/blockd.h"
 
 
 
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index c871e60..0ced6e7 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -12,22 +12,22 @@
 #include "vpx_ports/config.h"
 #include "encodemb.h"
 #include "encodemv.h"
-#include "common.h"
+#include "vp8/common/common.h"
 #include "onyx_int.h"
-#include "extend.h"
-#include "entropymode.h"
-#include "quant_common.h"
+#include "vp8/common/extend.h"
+#include "vp8/common/entropymode.h"
+#include "vp8/common/quant_common.h"
 #include "segmentation.h"
-#include "setupintrarecon.h"
+#include "vp8/common/setupintrarecon.h"
 #include "encodeintra.h"
-#include "reconinter.h"
+#include "vp8/common/reconinter.h"
 #include "rdopt.h"
 #include "pickinter.h"
-#include "findnearmv.h"
-#include "reconintra.h"
+#include "vp8/common/findnearmv.h"
+#include "vp8/common/reconintra.h"
 #include <stdio.h>
 #include <limits.h>
-#include "subpixel.h"
+#include "vp8/common/subpixel.h"
 #include "vpx_ports/vpx_timer.h"
 
 #if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index e016fa3..cd66016 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -10,15 +10,15 @@
 
 
 #include "vpx_ports/config.h"
-#include "idct.h"
+#include "vp8/common/idct.h"
 #include "quantize.h"
-#include "reconintra.h"
-#include "reconintra4x4.h"
+#include "vp8/common/reconintra.h"
+#include "vp8/common/reconintra4x4.h"
 #include "encodemb.h"
-#include "invtrans.h"
-#include "recon.h"
+#include "vp8/common/invtrans.h"
+#include "vp8/common/recon.h"
 #include "dct.h"
-#include "g_common.h"
+#include "vp8/common/g_common.h"
 #include "encodeintra.h"
 
 #define intra4x4ibias_rate    128
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index efcea74..30ef299 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -11,12 +11,12 @@
 
 #include "vpx_ports/config.h"
 #include "encodemb.h"
-#include "reconinter.h"
+#include "vp8/common/reconinter.h"
 #include "quantize.h"
 #include "tokenize.h"
-#include "invtrans.h"
-#include "recon.h"
-#include "reconintra.h"
+#include "vp8/common/invtrans.h"
+#include "vp8/common/recon.h"
+#include "vp8/common/reconintra.h"
 #include "dct.h"
 #include "vpx_mem/vpx_mem.h"
 
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index 4cb4c6e..6b1e6f9 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -9,10 +9,10 @@
  */
 
 
-#include "common.h"
+#include "vp8/common/common.h"
 #include "encodemv.h"
-#include "entropymode.h"
-#include "systemdependent.h"
+#include "vp8/common/entropymode.h"
+#include "vp8/common/systemdependent.h"
 
 #include <math.h>
 
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 4ce306f..12d5f66 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -9,9 +9,9 @@
  */
 
 #include "onyx_int.h"
-#include "threading.h"
-#include "common.h"
-#include "extend.h"
+#include "vp8/common/threading.h"
+#include "vp8/common/common.h"
+#include "vp8/common/extend.h"
 
 #if CONFIG_MULTITHREAD
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index fc6f043..9f0a972 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -14,18 +14,18 @@
 #include "onyx_int.h"
 #include "variance.h"
 #include "encodeintra.h"
-#include "setupintrarecon.h"
+#include "vp8/common/setupintrarecon.h"
 #include "mcomp.h"
 #include "vpx_scale/vpxscale.h"
 #include "encodemb.h"
-#include "extend.h"
-#include "systemdependent.h"
+#include "vp8/common/extend.h"
+#include "vp8/common/systemdependent.h"
 #include "vpx_scale/yv12extend.h"
 #include "vpx_mem/vpx_mem.h"
-#include "swapyv12buffer.h"
+#include "vp8/common/swapyv12buffer.h"
 #include <stdio.h>
 #include "rdopt.h"
-#include "quant_common.h"
+#include "vp8/common/quant_common.h"
 #include "encodemv.h"
 
 //#define OUTPUT_FPF 1
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 4738a5b..d9f5474 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -10,8 +10,8 @@
 
 
 #include "vpx_ports/config.h"
-#include "variance.h"
-#include "onyx_int.h"
+#include "vp8/encoder/variance.h"
+#include "vp8/encoder/onyx_int.h"
 
 
 void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index d23c97e..c636c48 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -9,10 +9,10 @@
  */
 
 
-#include "blockd.h"
+#include "vp8/common/blockd.h"
 #include "onyx_int.h"
 #include "treewriter.h"
-#include "entropymode.h"
+#include "vp8/common/entropymode.h"
 
 
 void vp8_init_mode_costs(VP8_COMP *c)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 3f787d6..6ab1b39 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -9,27 +9,27 @@
  */
 
 
-#include "onyxc_int.h"
+#include "vp8/common/onyxc_int.h"
 #include "onyx_int.h"
-#include "systemdependent.h"
+#include "vp8/common/systemdependent.h"
 #include "quantize.h"
-#include "alloccommon.h"
+#include "vp8/common/alloccommon.h"
 #include "mcomp.h"
 #include "firstpass.h"
 #include "psnr.h"
 #include "vpx_scale/vpxscale.h"
-#include "extend.h"
+#include "vp8/common/extend.h"
 #include "ratectrl.h"
-#include "quant_common.h"
+#include "vp8/common/quant_common.h"
 #include "segmentation.h"
-#include "g_common.h"
+#include "vp8/common/g_common.h"
 #include "vpx_scale/yv12extend.h"
-#include "postproc.h"
+#include "vp8/common/postproc.h"
 #include "vpx_mem/vpx_mem.h"
-#include "swapyv12buffer.h"
-#include "threading.h"
+#include "vp8/common/swapyv12buffer.h"
+#include "vp8/common/threading.h"
 #include "vpx_ports/vpx_timer.h"
-#include "vpxerrors.h"
+#include "vp8/common/vpxerrors.h"
 #include "temporal_filter.h"
 #if ARCH_ARM
 #include "vpx_ports/arm.h"
@@ -262,39 +262,32 @@
 
 void vp8_dealloc_compressor_data(VP8_COMP *cpi)
 {
-    if(cpi->tplist!=0)
         vpx_free(cpi->tplist);
     cpi->tplist = NULL;
 
     // Delete last frame MV storage buffers
-    if (cpi->lfmv != 0)
         vpx_free(cpi->lfmv);
 
     cpi->lfmv = 0;
 
-    if (cpi->lf_ref_frame_sign_bias != 0)
         vpx_free(cpi->lf_ref_frame_sign_bias);
 
     cpi->lf_ref_frame_sign_bias = 0;
 
-    if (cpi->lf_ref_frame != 0)
         vpx_free(cpi->lf_ref_frame);
 
     cpi->lf_ref_frame = 0;
 
     // Delete sementation map
-    if (cpi->segmentation_map != 0)
         vpx_free(cpi->segmentation_map);
 
     cpi->segmentation_map = 0;
 
-    if (cpi->active_map != 0)
         vpx_free(cpi->active_map);
 
     cpi->active_map = 0;
 
     // Delete first pass motion map
-    if (cpi->fp_motion_map != 0)
         vpx_free(cpi->fp_motion_map);
 
     cpi->fp_motion_map = 0;
@@ -319,23 +312,19 @@
     cpi->tok = 0;
 
     // Structure used to monitor GF usage
-    if (cpi->gf_active_flags != 0)
         vpx_free(cpi->gf_active_flags);
 
     cpi->gf_active_flags = 0;
 
-    if(cpi->mb.pip)
         vpx_free(cpi->mb.pip);
 
     cpi->mb.pip = 0;
 
 #if !(CONFIG_REALTIME_ONLY)
-    if(cpi->total_stats)
         vpx_free(cpi->total_stats);
 
     cpi->total_stats = 0;
 
-    if(cpi->this_frame_stats)
         vpx_free(cpi->this_frame_stats);
 
     cpi->this_frame_stats = 0;
@@ -442,7 +431,6 @@
     set_segment_data(ptr, &feature_data[0][0], SEGMENT_DELTADATA);
 
     // Delete sementation map
-    if (seg_map != 0)
         vpx_free(seg_map);
 
     seg_map = 0;
@@ -536,7 +524,6 @@
     set_segment_data((VP8_PTR)cpi, &feature_data[0][0], SEGMENT_DELTADATA);
 
     // Delete sementation map
-    if (seg_map != 0)
         vpx_free(seg_map);
 
     seg_map = 0;
@@ -1280,6 +1267,8 @@
 static void alloc_raw_frame_buffers(VP8_COMP *cpi)
 {
     int i, buffers;
+    /* allocate source_buffer to be multiples of 16 */
+    int width = (cpi->oxcf.Width + 15) & ~15;
 
     buffers = cpi->oxcf.lag_in_frames;
 
@@ -1291,7 +1280,7 @@
 
     for (i = 0; i < buffers; i++)
         if (vp8_yv12_alloc_frame_buffer(&cpi->src_buffer[i].source_buffer,
-                                        cpi->oxcf.Width, cpi->oxcf.Height,
+                                        width, cpi->oxcf.Height,
                                         16))
             vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
                                "Failed to allocate lag buffer");
@@ -1299,7 +1288,7 @@
 #if VP8_TEMPORAL_ALT_REF
 
     if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer.source_buffer,
-                                    cpi->oxcf.Width, cpi->oxcf.Height, 16))
+                                    width, cpi->oxcf.Height, 16))
         vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
                            "Failed to allocate altref buffer");
 
@@ -1310,7 +1299,6 @@
 
 static int vp8_alloc_partition_data(VP8_COMP *cpi)
 {
-    if(cpi->mb.pip)
         vpx_free(cpi->mb.pip);
 
     cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
@@ -1357,7 +1345,6 @@
                            "Failed to allocate scaled source buffer");
 
 
-    if (cpi->tok != 0)
         vpx_free(cpi->tok);
 
     {
@@ -1373,7 +1360,6 @@
 
 
     // Structures used to minitor GF usage
-    if (cpi->gf_active_flags != 0)
         vpx_free(cpi->gf_active_flags);
 
     CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols));
@@ -1381,12 +1367,10 @@
     cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
 
 #if !(CONFIG_REALTIME_ONLY)
-    if(cpi->total_stats)
         vpx_free(cpi->total_stats);
 
     cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
 
-    if(cpi->this_frame_stats)
         vpx_free(cpi->this_frame_stats);
 
     cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
@@ -1407,7 +1391,6 @@
         cpi->mt_sync_range = 16;
 #endif
 
-    if(cpi->tplist);
         vpx_free(cpi->tplist);
 
     CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index d6e4942..a9ccc89 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -14,21 +14,21 @@
 
 #include <stdio.h>
 #include "vpx_ports/config.h"
-#include "onyx.h"
+#include "vp8/common/onyx.h"
 #include "treewriter.h"
 #include "tokenize.h"
-#include "onyxc_int.h"
+#include "vp8/common/onyxc_int.h"
 #include "variance.h"
 #include "dct.h"
 #include "encodemb.h"
 #include "quantize.h"
-#include "entropy.h"
-#include "threading.h"
+#include "vp8/common/entropy.h"
+#include "vp8/common/threading.h"
 #include "vpx_ports/mem.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "mcomp.h"
 #include "temporal_filter.h"
-#include "findnearmv.h"
+#include "vp8/common/findnearmv.h"
 
 //#define SPEEDSTATS 1
 #define MIN_GF_INTERVAL             4
diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp
index 6cc4501..2a39b2c 100644
--- a/vp8/encoder/parms.cpp
+++ b/vp8/encoder/parms.cpp
@@ -16,7 +16,7 @@
 #include <fstream>
 extern "C"
 {
-    #include "onyx.h"
+    #include "vp8/common/onyx.h"
 }
 
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 1a58257..6ab85ad 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -14,14 +14,14 @@
 #include "onyx_int.h"
 #include "modecosts.h"
 #include "encodeintra.h"
-#include "entropymode.h"
+#include "vp8/common/entropymode.h"
 #include "pickinter.h"
-#include "findnearmv.h"
+#include "vp8/common/findnearmv.h"
 #include "encodemb.h"
-#include "reconinter.h"
-#include "reconintra.h"
-#include "reconintra4x4.h"
-#include "g_common.h"
+#include "vp8/common/reconinter.h"
+#include "vp8/common/reconintra.h"
+#include "vp8/common/reconintra4x4.h"
+#include "vp8/common/g_common.h"
 #include "variance.h"
 #include "mcomp.h"
 #include "rdopt.h"
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index b80e4c8..af060d7 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -12,7 +12,7 @@
 #ifndef __INC_PICKINTER_H
 #define __INC_PICKINTER_H
 #include "vpx_ports/config.h"
-#include "onyxc_int.h"
+#include "vp8/common/onyxc_int.h"
 
 #define RD_ESTIMATE(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
 extern int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *, MACROBLOCK *mb, int *Rate, int *Distortion);
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 78aa866..d294af6 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -9,13 +9,13 @@
  */
 
 
-#include "onyxc_int.h"
+#include "vp8/common/onyxc_int.h"
 #include "onyx_int.h"
 #include "quantize.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_scale/yv12extend.h"
 #include "vpx_scale/vpxscale.h"
-#include "alloccommon.h"
+#include "vp8/common/alloccommon.h"
 #if ARCH_ARM
 #include "vpx_ports/arm.h"
 #endif
diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c
index 588656b..8dfd2a5 100644
--- a/vp8/encoder/ppc/csystemdependent.c
+++ b/vp8/encoder/ppc/csystemdependent.c
@@ -9,8 +9,8 @@
  */
 
 
-#include "variance.h"
-#include "onyx_int.h"
+#include "vp8/encoder/variance.h"
+#include "vp8/encoder/onyx_int.h"
 
 SADFunction *vp8_sad16x16;
 SADFunction *vp8_sad16x8;
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
index dc2a03b..96b0ea8 100644
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -11,7 +11,7 @@
 
 #include "vpx_scale/yv12config.h"
 #include "math.h"
-#include "systemdependent.h" /* for vp8_clear_system_state() */
+#include "vp8/common/systemdependent.h" /* for vp8_clear_system_state() */
 
 #define MAX_PSNR 60
 
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 4a2329f..02b9d7b 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -13,8 +13,8 @@
 #include "vpx_mem/vpx_mem.h"
 
 #include "quantize.h"
-#include "entropy.h"
-#include "predictdc.h"
+#include "vp8/common/entropy.h"
+#include "vp8/common/predictdc.h"
 
 #define EXACT_QUANT
 
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index b69a196..bfffe43 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -16,11 +16,11 @@
 #include <assert.h>
 
 #include "math.h"
-#include "common.h"
+#include "vp8/common/common.h"
 #include "ratectrl.h"
-#include "entropymode.h"
+#include "vp8/common/entropymode.h"
 #include "vpx_mem/vpx_mem.h"
-#include "systemdependent.h"
+#include "vp8/common/systemdependent.h"
 #include "encodemv.h"
 
 
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 3449e45..ba5d337 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -13,28 +13,28 @@
 #include <math.h>
 #include <limits.h>
 #include <assert.h>
-#include "pragmas.h"
+#include "vp8/common/pragmas.h"
 
 #include "tokenize.h"
 #include "treewriter.h"
 #include "onyx_int.h"
 #include "modecosts.h"
 #include "encodeintra.h"
-#include "entropymode.h"
-#include "reconinter.h"
-#include "reconintra.h"
-#include "reconintra4x4.h"
-#include "findnearmv.h"
+#include "vp8/common/entropymode.h"
+#include "vp8/common/reconinter.h"
+#include "vp8/common/reconintra.h"
+#include "vp8/common/reconintra4x4.h"
+#include "vp8/common/findnearmv.h"
 #include "encodemb.h"
 #include "quantize.h"
-#include "idct.h"
-#include "g_common.h"
+#include "vp8/common/idct.h"
+#include "vp8/common/g_common.h"
 #include "variance.h"
 #include "mcomp.h"
 
 #include "vpx_mem/vpx_mem.h"
 #include "dct.h"
-#include "systemdependent.h"
+#include "vp8/common/systemdependent.h"
 
 #if CONFIG_RUNTIME_CPU_DETECT
 #define IF_RTCD(x)  (x)
diff --git a/vp8/encoder/segmentation.h b/vp8/encoder/segmentation.h
index 216e194..12815b0 100644
--- a/vp8/encoder/segmentation.h
+++ b/vp8/encoder/segmentation.h
@@ -10,7 +10,7 @@
 
 
 #include "string.h"
-#include "blockd.h"
+#include "vp8/common/blockd.h"
 #include "onyx_int.h"
 
 extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x);
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index e4c3db1..9ff8887 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -9,27 +9,27 @@
  */
 
 
-#include "onyxc_int.h"
+#include "vp8/common/onyxc_int.h"
 #include "onyx_int.h"
-#include "systemdependent.h"
+#include "vp8/common/systemdependent.h"
 #include "quantize.h"
-#include "alloccommon.h"
+#include "vp8/common/alloccommon.h"
 #include "mcomp.h"
 #include "firstpass.h"
 #include "psnr.h"
 #include "vpx_scale/vpxscale.h"
-#include "extend.h"
+#include "vp8/common/extend.h"
 #include "ratectrl.h"
-#include "quant_common.h"
+#include "vp8/common/quant_common.h"
 #include "segmentation.h"
-#include "g_common.h"
+#include "vp8/common/g_common.h"
 #include "vpx_scale/yv12extend.h"
-#include "postproc.h"
+#include "vp8/common/postproc.h"
 #include "vpx_mem/vpx_mem.h"
-#include "swapyv12buffer.h"
-#include "threading.h"
+#include "vp8/common/swapyv12buffer.h"
+#include "vp8/common/threading.h"
 #include "vpx_ports/vpx_timer.h"
-#include "vpxerrors.h"
+#include "vp8/common/vpxerrors.h"
 
 #include <math.h>
 #include <limits.h>
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index ed5eb0c..d87c1a3 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -12,7 +12,7 @@
 #ifndef tokenize_h
 #define tokenize_h
 
-#include "entropy.h"
+#include "vp8/common/entropy.h"
 #include "block.h"
 
 void vp8_tokenize_initialize();
diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h
index 88096d8..c28a0fa 100644
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -15,7 +15,7 @@
 /* Trees map alphabets into huffman-like codes suitable for an arithmetic
    bit coder.  Timothy S Murphy  11 October 2004 */
 
-#include "treecoder.h"
+#include "vp8/common/treecoder.h"
 
 #include "boolhuff.h"       /* for now */
 
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 2df73a6..6eed98e 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -9,8 +9,8 @@
  */
 
 
-#include "variance.h"
-#include "pragmas.h"
+#include "vp8/encoder/variance.h"
+#include "vp8/common/pragmas.h"
 #include "vpx_ports/mem.h"
 
 extern void filter_block1d_h6_mmx
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 6f79f0d..7cf6a63 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -9,8 +9,8 @@
  */
 
 
-#include "variance.h"
-#include "pragmas.h"
+#include "vp8/encoder/variance.h"
+#include "vp8/common/pragmas.h"
 #include "vpx_ports/mem.h"
 
 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 31438f9..4e3ce07 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -11,8 +11,8 @@
 
 #include "vpx_ports/config.h"
 #include "vpx_ports/x86.h"
-#include "variance.h"
-#include "onyx_int.h"
+#include "vp8/encoder/variance.h"
+#include "vp8/encoder/onyx_int.h"
 
 
 #if HAVE_MMX
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index af07618..822af83 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -8,24 +8,13 @@
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
-
-#add this file to the installed sources list
 VP8_COMMON_SRCS-yes += vp8_common.mk
-
-CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
 VP8_COMMON_SRCS-yes += common/type_aliases.h
 VP8_COMMON_SRCS-yes += common/pragmas.h
-
-CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
 VP8_COMMON_SRCS-yes += common/vpxerrors.h
-
-CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
 VP8_COMMON_SRCS-yes += common/ppflags.h
 VP8_COMMON_SRCS-yes += common/onyx.h
 VP8_COMMON_SRCS-yes += common/onyxd.h
-
-CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
-
 VP8_COMMON_SRCS-yes += common/alloccommon.c
 VP8_COMMON_SRCS-yes += common/blockd.c
 VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index ea99c61..f182b4f 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -12,10 +12,10 @@
 #include "vpx/vpx_codec.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "vpx_version.h"
-#include "onyx_int.h"
+#include "vp8/encoder/onyx_int.h"
 #include "vpx/vp8e.h"
 #include "vp8/encoder/firstpass.h"
-#include "onyx.h"
+#include "vp8/common/onyx.h"
 #include <stdlib.h>
 #include <string.h>
 
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 1b1cf3b..ce55c05 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -15,8 +15,8 @@
 #include "vpx/vp8dx.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "vpx_version.h"
-#include "onyxd.h"
-#include "onyxd_int.h"
+#include "common/onyxd.h"
+#include "decoder/onyxd_int.h"
 
 #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
 
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 1ec26e6..a84188f 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -33,8 +33,6 @@
 #INCLUDES += common
 #INCLUDES += encoder
 
-CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)encoder
-
 VP8_CX_SRCS-yes += encoder/bitstream.c
 VP8_CX_SRCS-yes += encoder/boolhuff.c
 VP8_CX_SRCS-yes += encoder/dct.c
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index abc5dc8..7980a0f 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -34,6 +34,7 @@
 
 #File list for armv6
 # encoder
+VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/walsh_v6$(ASM)
 
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 62f6211..5649671 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -24,9 +24,6 @@
 
 VP8_DX_SRCS-yes += vp8_dx_iface.c
 
-CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder
-
-
 # common
 #define ARM
 #define DISABLE_THREAD
diff --git a/vpx_scale/blackfin/yv12config.c b/vpx_scale/blackfin/yv12config.c
index 42538af..c404202 100644
--- a/vpx_scale/blackfin/yv12config.c
+++ b/vpx_scale/blackfin/yv12config.c
@@ -39,10 +39,7 @@
 {
     if (ybf)
     {
-        if (ybf->buffer_alloc)
-        {
             duck_free(ybf->buffer_alloc);
-        }
 
         ybf->buffer_alloc = 0;
     }
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
index 420f719..4468e9d 100644
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@@ -271,17 +271,17 @@
 {
     if (!g_first_time)
     {
-        if (g_b_scaler.l_w) vpx_free(g_b_scaler.l_w);
+        vpx_free(g_b_scaler.l_w);
 
-        if (g_b_scaler.l_h) vpx_free(g_b_scaler.l_h);
+        vpx_free(g_b_scaler.l_h);
 
-        if (g_b_scaler.l_h_uv) vpx_free(g_b_scaler.l_h_uv);
+        vpx_free(g_b_scaler.l_h_uv);
 
-        if (g_b_scaler.c_w) vpx_free(g_b_scaler.c_w);
+        vpx_free(g_b_scaler.c_w);
 
-        if (g_b_scaler.c_h) vpx_free(g_b_scaler.c_h);
+        vpx_free(g_b_scaler.c_h);
 
-        if (g_b_scaler.c_h_uv) vpx_free(g_b_scaler.c_h_uv);
+        vpx_free(g_b_scaler.c_h_uv);
 
         vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
     }
@@ -342,21 +342,21 @@
     d_h_uv = (in_height / 2) / gcd_h_uv;
 
     // allocate memory for the coefficents
-    if (g_b_scaler.l_w) vpx_free(g_b_scaler.l_w);
+    vpx_free(g_b_scaler.l_w);
 
-    if (g_b_scaler.l_h) vpx_free(g_b_scaler.l_h);
+    vpx_free(g_b_scaler.l_h);
 
-    if (g_b_scaler.l_h_uv) vpx_free(g_b_scaler.l_h_uv);
+    vpx_free(g_b_scaler.l_h_uv);
 
     g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
     g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
     g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
 
-    if (g_b_scaler.c_w) vpx_free(g_b_scaler.c_w);
+    vpx_free(g_b_scaler.c_w);
 
-    if (g_b_scaler.c_h) vpx_free(g_b_scaler.c_h);
+    vpx_free(g_b_scaler.c_h);
 
-    if (g_b_scaler.c_h_uv) vpx_free(g_b_scaler.c_h_uv);
+    vpx_free(g_b_scaler.c_h_uv);
 
     g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
     g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index e7c5b18..9539dca 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -24,10 +24,7 @@
 {
     if (ybf)
     {
-        if (ybf->buffer_alloc)
-        {
             duck_free(ybf->buffer_alloc);
-        }
 
         ybf->buffer_alloc = 0;
     }