Merge "Making c++ compiler happier."
diff --git a/examples/vp9_spatial_scalable_encoder.c b/examples/vp9_spatial_scalable_encoder.c
index 5c80d34..64e62ef 100644
--- a/examples/vp9_spatial_scalable_encoder.c
+++ b/examples/vp9_spatial_scalable_encoder.c
@@ -67,13 +67,22 @@
     ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
 static const arg_def_t fpf_name_arg =
     ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
+static const arg_def_t min_q_arg =
+    ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
+static const arg_def_t max_q_arg =
+    ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
+static const arg_def_t min_bitrate_arg =
+    ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
+static const arg_def_t max_bitrate_arg =
+    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
 
 static const arg_def_t *svc_args[] = {
   &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
   &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
   &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,
-  &quantizers_keyframe_arg,               &passes_arg,       &pass_arg,
-  &fpf_name_arg,      NULL
+  &quantizers_keyframe_arg,               &passes_arg,      &pass_arg,
+  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
+  &max_bitrate_arg,   NULL
 };
 
 static const SVC_ENCODING_MODE default_encoding_mode =
@@ -120,6 +129,8 @@
   int passes = 0;
   int pass = 0;
   const char *fpf_file_name = NULL;
+  unsigned int min_bitrate = 0;
+  unsigned int max_bitrate = 0;
 
   // initialize SvcContext with parameters that will be passed to vpx_svc_init
   svc_ctx->log_level = SVC_LOG_DEBUG;
@@ -186,6 +197,14 @@
       }
     } else if (arg_match(&arg, &fpf_name_arg, argi)) {
       fpf_file_name = arg.val;
+    } else if (arg_match(&arg, &min_q_arg, argi)) {
+      enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &max_q_arg, argi)) {
+      enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
+      min_bitrate = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
+      max_bitrate = arg_parse_uint(&arg);
     } else {
       ++argj;
     }
@@ -221,6 +240,17 @@
     app_input->pass = pass;
   }
 
+  if (enc_cfg->rc_target_bitrate > 0) {
+    if (min_bitrate > 0) {
+      enc_cfg->rc_2pass_vbr_minsection_pct =
+          min_bitrate * 100 / enc_cfg->rc_target_bitrate;
+    }
+    if (max_bitrate > 0) {
+      enc_cfg->rc_2pass_vbr_maxsection_pct =
+          max_bitrate * 100 / enc_cfg->rc_target_bitrate;
+    }
+  }
+
   // Check for unrecognized options
   for (argi = argv; *argi; ++argi)
     if (argi[0][0] == '-' && strlen(argi[0]) > 1)
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index ad11d1c..600dfbb 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -17,16 +17,15 @@
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_systemdependent.h"
 
-void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
-  const int stride = cm->mode_info_stride;
+static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) {
   int i;
 
-  // Clear down top border row
-  vpx_memset(mi, 0, sizeof(MODE_INFO) * stride);
+  // Top border row
+  vpx_memset(mi, 0, sizeof(*mi) * cm->mode_info_stride);
 
-  // Clear left border column
-  for (i = 1; i < cm->mi_rows + 1; i++)
-    vpx_memset(&mi[i * stride], 0, sizeof(MODE_INFO));
+  // Left border column
+  for (i = 1; i < cm->mi_rows + 1; ++i)
+    vpx_memset(&mi[i * cm->mode_info_stride], 0, sizeof(*mi));
 }
 
 static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
@@ -52,7 +51,7 @@
              cm->mode_info_stride * (cm->mi_rows + 1) *
              sizeof(*cm->mi_grid_base));
 
-  vp9_update_mode_info_border(cm, cm->prev_mip);
+  clear_mi_border(cm, cm->prev_mip);
 }
 
 static int alloc_mi(VP9_COMMON *cm, int mi_size) {
@@ -211,10 +210,6 @@
   vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
 }
 
-void vp9_initialize_common() {
-  vp9_init_neighbors();
-}
-
 void vp9_update_frame_size(VP9_COMMON *cm) {
   const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, MI_SIZE_LOG2);
   const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, MI_SIZE_LOG2);
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
index fca6935..06636a9 100644
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -12,26 +12,23 @@
 #ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_
 #define VP9_COMMON_VP9_ALLOCCOMMON_H_
 
-#include "vp9/common/vp9_onyxc_int.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-void vp9_initialize_common();
+struct VP9Common;
 
-void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi);
+void vp9_remove_common(struct VP9Common *cm);
 
-void vp9_remove_common(VP9_COMMON *cm);
+int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height);
 
-int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height);
-int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height);
-void vp9_free_frame_buffers(VP9_COMMON *cm);
+int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height);
 
+void vp9_free_frame_buffers(struct VP9Common *cm);
 
-void vp9_update_frame_size(VP9_COMMON *cm);
+void vp9_update_frame_size(struct VP9Common *cm);
 
-void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm);
+void vp9_swap_mi_and_prev_mi(struct VP9Common *cm);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index af8afed..e48d417 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -228,6 +228,12 @@
   }
 }
 
+static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
+                                const MB_MODE_INFO *mbmi) {
+  return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]]
+                   [mode_lf_lut[mbmi->mode]];
+}
+
 void vp9_loop_filter_init(VP9_COMMON *cm) {
   loop_filter_info_n *lfi = &cm->lf_info;
   struct loopfilter *lf = &cm->lf;
@@ -493,27 +499,25 @@
                         const MODE_INFO *mi, const int shift_y,
                         const int shift_uv,
                         LOOP_FILTER_MASK *lfm) {
-  const BLOCK_SIZE block_size = mi->mbmi.sb_type;
-  const TX_SIZE tx_size_y = mi->mbmi.tx_size;
-  const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi);
-  const int skip = mi->mbmi.skip;
-  const int seg = mi->mbmi.segment_id;
-  const int ref = mi->mbmi.ref_frame[0];
-  const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
-  uint64_t *left_y = &lfm->left_y[tx_size_y];
-  uint64_t *above_y = &lfm->above_y[tx_size_y];
-  uint64_t *int_4x4_y = &lfm->int_4x4_y;
-  uint16_t *left_uv = &lfm->left_uv[tx_size_uv];
-  uint16_t *above_uv = &lfm->above_uv[tx_size_uv];
-  uint16_t *int_4x4_uv = &lfm->int_4x4_uv;
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+  const BLOCK_SIZE block_size = mbmi->sb_type;
+  const TX_SIZE tx_size_y = mbmi->tx_size;
+  const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
+  const int filter_level = get_filter_level(lfi_n, mbmi);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+  uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
+  uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+  uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
   int i;
-  int w = num_8x8_blocks_wide_lookup[block_size];
-  int h = num_8x8_blocks_high_lookup[block_size];
 
   // If filter level is 0 we don't loop filter.
   if (!filter_level) {
     return;
   } else {
+    const int w = num_8x8_blocks_wide_lookup[block_size];
+    const int h = num_8x8_blocks_high_lookup[block_size];
     int index = shift_y;
     for (i = 0; i < h; i++) {
       vpx_memset(&lfm->lfl_y[index], filter_level, w);
@@ -540,7 +544,7 @@
 
   // If the block has no coefficients and is not intra we skip applying
   // the loop filter on block edges.
-  if (skip && ref > INTRA_FRAME)
+  if (mbmi->skip && is_inter_block(mbmi))
     return;
 
   // Here we are adding a mask for the transform size.  The transform
@@ -561,12 +565,11 @@
   // boundaries.  These differ from the 4x4 boundaries on the outside edge of
   // an 8x8 in that the internal ones can be skipped and don't depend on
   // the prediction block size.
-  if (tx_size_y == TX_4X4) {
+  if (tx_size_y == TX_4X4)
     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
-  }
-  if (tx_size_uv == TX_4X4) {
+
+  if (tx_size_uv == TX_4X4)
     *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
-  }
 }
 
 // This function does the same thing as the one above with the exception that
@@ -575,22 +578,20 @@
 static void build_y_mask(const loop_filter_info_n *const lfi_n,
                          const MODE_INFO *mi, const int shift_y,
                          LOOP_FILTER_MASK *lfm) {
-  const BLOCK_SIZE block_size = mi->mbmi.sb_type;
-  const TX_SIZE tx_size_y = mi->mbmi.tx_size;
-  const int skip = mi->mbmi.skip;
-  const int seg = mi->mbmi.segment_id;
-  const int ref = mi->mbmi.ref_frame[0];
-  const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
-  uint64_t *left_y = &lfm->left_y[tx_size_y];
-  uint64_t *above_y = &lfm->above_y[tx_size_y];
-  uint64_t *int_4x4_y = &lfm->int_4x4_y;
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+  const BLOCK_SIZE block_size = mbmi->sb_type;
+  const TX_SIZE tx_size_y = mbmi->tx_size;
+  const int filter_level = get_filter_level(lfi_n, mbmi);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
   int i;
-  int w = num_8x8_blocks_wide_lookup[block_size];
-  int h = num_8x8_blocks_high_lookup[block_size];
 
   if (!filter_level) {
     return;
   } else {
+    const int w = num_8x8_blocks_wide_lookup[block_size];
+    const int h = num_8x8_blocks_high_lookup[block_size];
     int index = shift_y;
     for (i = 0; i < h; i++) {
       vpx_memset(&lfm->lfl_y[index], filter_level, w);
@@ -601,7 +602,7 @@
   *above_y |= above_prediction_mask[block_size] << shift_y;
   *left_y |= left_prediction_mask[block_size] << shift_y;
 
-  if (skip && ref > INTRA_FRAME)
+  if (mbmi->skip && is_inter_block(mbmi))
     return;
 
   *above_y |= (size_mask[block_size] &
@@ -610,9 +611,8 @@
   *left_y |= (size_mask[block_size] &
               left_64x64_txform_mask[tx_size_y]) << shift_y;
 
-  if (tx_size_y == TX_4X4) {
+  if (tx_size_y == TX_4X4)
     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
-  }
 }
 
 // This function sets up the bit masks for the entire 64x64 region represented
@@ -868,13 +868,6 @@
   assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
 }
 
-static uint8_t build_lfi(const loop_filter_info_n *lfi_n,
-                     const MB_MODE_INFO *mbmi) {
-  const int seg = mbmi->segment_id;
-  const int ref = mbmi->ref_frame[0];
-  return lfi_n->lvl[seg][ref][mode_lf_lut[mbmi->mode]];
-}
-
 static void filter_selectively_vert(uint8_t *s, int pitch,
                                     unsigned int mask_16x16,
                                     unsigned int mask_8x8,
@@ -953,7 +946,7 @@
 
       // Filter level can vary per MI
       if (!(lfl[(r << 3) + (c >> ss_x)] =
-          build_lfi(&cm->lf_info, &mi[0].mbmi)))
+            get_filter_level(&cm->lf_info, &mi[0].mbmi)))
         continue;
 
       // Build masks based on the transform size of each block
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index a049db1..3cc12cf 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -286,12 +286,12 @@
   return cm->frame_type == KEY_FRAME || cm->intra_only;
 }
 
-static INLINE void update_partition_context(
-    PARTITION_CONTEXT *above_seg_context,
-    PARTITION_CONTEXT left_seg_context[8],
-    int mi_row, int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) {
-  PARTITION_CONTEXT *const above_ctx = above_seg_context + mi_col;
-  PARTITION_CONTEXT *const left_ctx = left_seg_context + (mi_row & MI_MASK);
+static INLINE void update_partition_context(MACROBLOCKD *xd,
+                                            int mi_row, int mi_col,
+                                            BLOCK_SIZE subsize,
+                                            BLOCK_SIZE bsize) {
+  PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
+  PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
 
   // num_4x4_blocks_wide_lookup[bsize] / 2
   const int bs = num_8x8_blocks_wide_lookup[bsize];
@@ -303,12 +303,11 @@
   vpx_memset(left_ctx, partition_context_lookup[subsize].left, bs);
 }
 
-static INLINE int partition_plane_context(
-    const PARTITION_CONTEXT *above_seg_context,
-    const PARTITION_CONTEXT left_seg_context[8],
-    int mi_row, int mi_col, BLOCK_SIZE bsize) {
-  const PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col;
-  const PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK);
+static INLINE int partition_plane_context(const MACROBLOCKD *xd,
+                                          int mi_row, int mi_col,
+                                          BLOCK_SIZE bsize) {
+  const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
+  const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
 
   const int bsl = mi_width_log2(bsize);
   const int bs = 1 << bsl;
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 915c1c1..44951b5 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -347,6 +347,8 @@
   x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
   y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
 
+  vpx_memset(left_col, 129, 64);
+
   // left
   if (left_available) {
     if (xd->mb_to_bottom_edge < 0) {
@@ -366,8 +368,6 @@
       for (i = 0; i < bs; ++i)
         left_col[i] = ref[i * ref_stride - 1];
     }
-  } else {
-    vpx_memset(left_col, 129, bs);
   }
 
   // TODO(hkuang) do not extend 2*bs pixels for all modes.
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index e4cd9d4..b874ef3 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -636,7 +636,7 @@
 specialize qw/vp9_sad4x4x8 sse4/;
 
 add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2/;
+specialize qw/vp9_sad64x64x4d sse2 avx2/;
 
 add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad32x64x4d sse2/;
@@ -651,7 +651,7 @@
 specialize qw/vp9_sad16x32x4d sse2/;
 
 add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2/;
+specialize qw/vp9_sad32x32x4d sse2 avx2/;
 
 add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad16x16x4d sse2/;
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 10b886cf..bb5a5a2 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -407,9 +407,7 @@
 static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs,
                                      int mi_row, int mi_col, BLOCK_SIZE bsize,
                                      vp9_reader *r) {
-  const int ctx = partition_plane_context(xd->above_seg_context,
-                                          xd->left_seg_context,
-                                          mi_row, mi_col, bsize);
+  const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
   const vp9_prob *const probs = get_partition_probs(cm, ctx);
   const int has_rows = (mi_row + hbs) < cm->mi_rows;
   const int has_cols = (mi_col + hbs) < cm->mi_cols;
@@ -474,8 +472,7 @@
   // update partition context
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
-    update_partition_context(xd->above_seg_context, xd->left_seg_context,
-                             mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
 static void setup_token_decoder(const uint8_t *data,
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index fda0d2f..982b851 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -104,7 +104,7 @@
   static int init_done = 0;
 
   if (!init_done) {
-    vp9_initialize_common();
+    vp9_init_neighbors();
     vp9_init_quant_tables();
     init_done = 1;
   }
@@ -119,7 +119,7 @@
     pd[i].dqcoeff = pbi->dqcoeff[i];
 }
 
-VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) {
+VP9D_COMP *vp9_create_decompressor(const VP9D_CONFIG *oxcf) {
   VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP));
   VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
 
@@ -227,17 +227,15 @@
 }
 
 
-vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi,
+vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
                                       VP9_REFFRAME ref_frame_flag,
                                       YV12_BUFFER_CONFIG *sd) {
-  VP9_COMMON *cm = &pbi->common;
   RefBuffer *ref_buf = NULL;
 
-  /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
-   * encoder is using the frame buffers for. This is just a stub to keep the
-   * vpxenc --test-decode functionality working, and will be replaced in a
-   * later commit that adds VP9-specific controls for this functionality.
-   */
+  // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
+  // encoder is using the frame buffers for. This is just a stub to keep the
+  // vpxenc --test-decode functionality working, and will be replaced in a
+  // later commit that adds VP9-specific controls for this functionality.
   if (ref_frame_flag == VP9_LAST_FLAG) {
     ref_buf = &cm->frame_refs[0];
   } else if (ref_frame_flag == VP9_GOLD_FLAG) {
@@ -245,13 +243,13 @@
   } else if (ref_frame_flag == VP9_ALT_FLAG) {
     ref_buf = &cm->frame_refs[2];
   } else {
-    vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+    vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                        "Invalid reference frame");
-    return pbi->common.error.error_code;
+    return cm->error.error_code;
   }
 
   if (!equal_dimensions(ref_buf->buf, sd)) {
-    vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+    vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                        "Incorrect buffer dimensions");
   } else {
     int *ref_fb_ptr = &ref_buf->idx;
@@ -268,7 +266,7 @@
     vp8_yv12_copy_frame(sd, ref_buf->buf);
   }
 
-  return pbi->common.error.error_code;
+  return cm->error.error_code;
 }
 
 
@@ -310,29 +308,21 @@
 int vp9_receive_compressed_data(VP9D_COMP *pbi,
                                 size_t size, const uint8_t **psource,
                                 int64_t time_stamp) {
-  VP9_COMMON *cm = NULL;
+  VP9_COMMON *const cm = &pbi->common;
   const uint8_t *source = *psource;
   int retcode = 0;
 
-  /*if(pbi->ready_for_new_data == 0)
-      return -1;*/
-
-  if (!pbi)
-    return -1;
-
-  cm = &pbi->common;
   cm->error.error_code = VPX_CODEC_OK;
 
   if (size == 0) {
-    /* This is used to signal that we are missing frames.
-     * We do not know if the missing frame(s) was supposed to update
-     * any of the reference buffers, but we act conservative and
-     * mark only the last buffer as corrupted.
-     *
-     * TODO(jkoleszar): Error concealment is undefined and non-normative
-     * at this point, but if it becomes so, [0] may not always be the correct
-     * thing to do here.
-     */
+    // This is used to signal that we are missing frames.
+    // We do not know if the missing frame(s) was supposed to update
+    // any of the reference buffers, but we act conservative and
+    // mark only the last buffer as corrupted.
+    //
+    // TODO(jkoleszar): Error concealment is undefined and non-normative
+    // at this point, but if it becomes so, [0] may not always be the correct
+    // thing to do here.
     if (cm->frame_refs[0].idx != INT_MAX)
       cm->frame_refs[0].buf->corrupted = 1;
   }
@@ -346,14 +336,13 @@
   if (setjmp(cm->error.jmp)) {
     cm->error.setjmp = 0;
 
-    /* We do not know if the missing frame(s) was supposed to update
-     * any of the reference buffers, but we act conservative and
-     * mark only the last buffer as corrupted.
-     *
-     * TODO(jkoleszar): Error concealment is undefined and non-normative
-     * at this point, but if it becomes so, [0] may not always be the correct
-     * thing to do here.
-     */
+    // We do not know if the missing frame(s) was supposed to update
+    // any of the reference buffers, but we act conservative and
+    // mark only the last buffer as corrupted.
+    //
+    // TODO(jkoleszar): Error concealment is undefined and non-normative
+    // at this point, but if it becomes so, [0] may not always be the correct
+    // thing to do here.
     if (cm->frame_refs[0].idx != INT_MAX)
       cm->frame_refs[0].buf->corrupted = 1;
 
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index a9458c4..9318c55 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -31,10 +31,8 @@
   int width;
   int height;
   int version;
-  int postprocess;
   int max_threads;
   int inv_tile_order;
-  int input_partition;
 } VP9D_CONFIG;
 
 typedef struct VP9Decompressor {
@@ -82,7 +80,7 @@
                                        VP9_REFFRAME ref_frame_flag,
                                        YV12_BUFFER_CONFIG *sd);
 
-vpx_codec_err_t vp9_set_reference_dec(struct VP9Decompressor *pbi,
+vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
                                       VP9_REFFRAME ref_frame_flag,
                                       YV12_BUFFER_CONFIG *sd);
 
@@ -90,7 +88,7 @@
                           int index, YV12_BUFFER_CONFIG **fb);
 
 
-struct VP9Decompressor *vp9_create_decompressor(VP9D_CONFIG *oxcf);
+struct VP9Decompressor *vp9_create_decompressor(const VP9D_CONFIG *oxcf);
 
 void vp9_remove_decompressor(struct VP9Decompressor *pbi);
 
diff --git a/vp9/encoder/vp9_craq.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
similarity index 76%
rename from vp9/encoder/vp9_craq.c
rename to vp9/encoder/vp9_aq_cyclicrefresh.c
index 40437c7..231276b 100644
--- a/vp9/encoder/vp9_craq.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -11,7 +11,7 @@
 #include <limits.h>
 #include <math.h>
 
-#include "vp9/encoder/vp9_craq.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
 
 #include "vp9/common/vp9_seg_common.h"
 
@@ -21,31 +21,27 @@
 
 
 // Check if we should turn off cyclic refresh based on bitrate condition.
-static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) {
+static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm,
+                                        const RATE_CONTROL *rc) {
   // Turn off cyclic refresh if bits available per frame is not sufficiently
   // larger than bit cost of segmentation. Segment map bit cost should scale
   // with number of seg blocks, so compare available bits to number of blocks.
   // Average bits available per frame = av_per_frame_bandwidth
   // Number of (8x8) blocks in frame = mi_rows * mi_cols;
-  float factor  = 0.5;
-  int number_blocks = cpi->common.mi_rows  * cpi->common.mi_cols;
+  const float factor  = 0.5;
+  const int number_blocks = cm->mi_rows  * cm->mi_cols;
   // The condition below corresponds to turning off at target bitrates:
   // ~24kbps for CIF, 72kbps for VGA (at 30fps).
-  if (cpi->rc.av_per_frame_bandwidth < factor * number_blocks)
-    return 0;
-  else
-    return 1;
+  return rc->av_per_frame_bandwidth >= factor * number_blocks;
 }
 
 // Check if this coding block, of size bsize, should be considered for refresh
 // (lower-qp coding). Decision can be based on various factors, such as
 // size of the coding block (i.e., below min_block size rejected), coding
 // mode, and rate/distortion.
-static int candidate_refresh_aq(VP9_COMP *const cpi,
-                                MODE_INFO *const mi,
-                                int bsize,
-                                int use_rd) {
-  CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh;
+static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
+                                const MB_MODE_INFO *mbmi,
+                                BLOCK_SIZE bsize, int use_rd) {
   if (use_rd) {
     // If projected rate is below the thresh_rate (well below target,
     // so undershoot expected), accept it for lower-qp coding.
@@ -56,18 +52,18 @@
     // 2) mode is non-zero mv and projected distortion is above thresh_dist
     // 3) mode is an intra-mode (we may want to allow some of this under
     // another thresh_dist)
-    else if ((bsize < cr->min_block_size) ||
-        (mi->mbmi.mv[0].as_int != 0 &&
-            cr->projected_dist_sb > cr->thresh_dist_sb) ||
-            !is_inter_block(&mi->mbmi))
+    else if (bsize < cr->min_block_size ||
+             (mbmi->mv[0].as_int != 0 &&
+              cr->projected_dist_sb > cr->thresh_dist_sb) ||
+             !is_inter_block(mbmi))
       return 0;
     else
       return 1;
   } else {
     // Rate/distortion not used for update.
-    if ((bsize < cr->min_block_size) ||
-      (mi->mbmi.mv[0].as_int != 0) ||
-      !is_inter_block(&mi->mbmi))
+    if (bsize < cr->min_block_size ||
+        mbmi->mv[0].as_int != 0 ||
+        !is_inter_block(mbmi))
       return 0;
     else
       return 1;
@@ -78,32 +74,32 @@
 // check if we should reset the segment_id, and update the cyclic_refresh map
 // and segmentation map.
 void vp9_update_segment_aq(VP9_COMP *const cpi,
-                           MODE_INFO *const mi,
+                           MB_MODE_INFO *const mbmi,
                            int mi_row,
                            int mi_col,
-                           int bsize,
+                           BLOCK_SIZE bsize,
                            int use_rd) {
+  const VP9_COMMON *const cm = &cpi->common;
   CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh;
-  VP9_COMMON *const cm = &cpi->common;
   const int bw = num_8x8_blocks_wide_lookup[bsize];
   const int bh = num_8x8_blocks_high_lookup[bsize];
   const int xmis = MIN(cm->mi_cols - mi_col, bw);
   const int ymis = MIN(cm->mi_rows - mi_row, bh);
   const int block_index = mi_row * cm->mi_cols + mi_col;
+  const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd);
   // Default is to not update the refresh map.
   int new_map_value = cr->map[block_index];
   int x = 0; int y = 0;
-  int current_segment = mi->mbmi.segment_id;
-  int refresh_this_block = candidate_refresh_aq(cpi, mi, bsize, use_rd);
+
   // Check if we should reset the segment_id for this block.
-  if (current_segment && !refresh_this_block)
-    mi->mbmi.segment_id = 0;
+  if (mbmi->segment_id > 0 && !refresh_this_block)
+    mbmi->segment_id = 0;
 
   // Update the cyclic refresh map, to be used for setting segmentation map
   // for the next frame. If the block  will be refreshed this frame, mark it
   // as clean. The magnitude of the -ve influences how long before we consider
   // it for refresh again.
-  if (mi->mbmi.segment_id == 1) {
+  if (mbmi->segment_id == 1) {
     new_map_value = -cr->time_for_refresh;
   } else if (refresh_this_block) {
     // Else if it is accepted as candidate for refresh, and has not already
@@ -121,39 +117,40 @@
     for (x = 0; x < xmis; x++) {
       cr->map[block_index + y * cm->mi_cols + x] = new_map_value;
       cpi->segmentation_map[block_index + y * cm->mi_cols + x] =
-          mi->mbmi.segment_id;
+          mbmi->segment_id;
     }
   // Keep track of actual number (in units of 8x8) of blocks in segment 1 used
   // for encoding this frame.
-  if (mi->mbmi.segment_id)
+  if (mbmi->segment_id)
     cr->num_seg_blocks += xmis * ymis;
 }
 
 // Setup cyclic background refresh: set delta q and segmentation map.
 void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  const RATE_CONTROL *const rc = &cpi->rc;
   CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh;
   struct segmentation *const seg = &cm->seg;
-  unsigned char *seg_map = cpi->segmentation_map;
-  int apply_cyclic_refresh  = apply_cyclic_refresh_bitrate(cpi);
+  unsigned char *const seg_map = cpi->segmentation_map;
+  const int apply_cyclic_refresh  = apply_cyclic_refresh_bitrate(cm, rc);
   // Don't apply refresh on key frame or enhancement layer frames.
   if (!apply_cyclic_refresh ||
-      (cpi->common.frame_type == KEY_FRAME) ||
+      (cm->frame_type == KEY_FRAME) ||
       (cpi->svc.temporal_layer_id > 0)) {
     // Set segmentation map to 0 and disable.
     vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
     vp9_disable_segmentation(&cm->seg);
-    if (cpi->common.frame_type == KEY_FRAME)
+    if (cm->frame_type == KEY_FRAME)
       cr->mb_index = 0;
     return;
   } else {
+    const int mbs_in_frame = cm->mi_rows * cm->mi_cols;
     int qindex_delta = 0;
-    int mbs_in_frame = cm->mi_rows * cm->mi_cols;
-    int i, x, y, block_count, bl_index, bl_index2;
-    int sum_map, new_value, mi_row, mi_col, xmis, ymis, qindex2;
+    int i, x, y, block_count;
+    int mi_row, mi_col, qindex2;
 
     // Rate target ratio to set q delta.
-    float rate_ratio_qdelta = 2.0;
+    const float rate_ratio_qdelta = 2.0;
     vp9_clear_system_state();
     // Some of these parameters may be set via codec-control function later.
     cr->max_mbs_perframe = 10;
@@ -161,14 +158,14 @@
     cr->min_block_size = BLOCK_16X16;
     cr->time_for_refresh = 1;
     // Set rate threshold to some fraction of target (and scaled by 256).
-    cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 2;
+    cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2;
     // Distortion threshold, quadratic in Q, scale factor to be adjusted.
     cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
         vp9_convert_qindex_to_q(cm->base_qindex));
     if (cpi->sf.use_nonrd_pick_mode) {
       // May want to be more conservative with thresholds in non-rd mode for now
       // as rate/distortion are derived from model based on prediction residual.
-      cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 3;
+      cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3;
       cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
           vp9_convert_qindex_to_q(cm->base_qindex));
     }
@@ -200,9 +197,8 @@
                                               rate_ratio_qdelta);
     // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from
     // previous encoded frame.
-    if ((-qindex_delta) > cr->max_qdelta_perc * cm->base_qindex / 100) {
+    if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100)
       qindex_delta = -cr->max_qdelta_perc * cm->base_qindex / 100;
-    }
 
     // Compute rd-mult for segment 1.
     qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
@@ -238,29 +234,21 @@
     // Enforce constant segment map over superblock.
     for (mi_row = 0; mi_row < cm->mi_rows; mi_row +=  MI_BLOCK_SIZE)
       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
-        bl_index = mi_row * cm->mi_cols + mi_col;
-        xmis = num_8x8_blocks_wide_lookup[BLOCK_64X64];
-        ymis = num_8x8_blocks_high_lookup[BLOCK_64X64];
-        xmis = MIN(cm->mi_cols - mi_col, xmis);
-        ymis = MIN(cm->mi_rows - mi_row, ymis);
-        sum_map = 0;
+        const int bl_index = mi_row * cm->mi_cols + mi_col;
+        const int xmis = MIN(cm->mi_cols - mi_col,
+                             num_8x8_blocks_wide_lookup[BLOCK_64X64]);
+        const int ymis = MIN(cm->mi_rows - mi_row,
+                             num_8x8_blocks_high_lookup[BLOCK_64X64]);
+        int sum_map = 0;
         for (y = 0; y < ymis; y++)
-          for (x = 0; x < xmis; x++) {
-            bl_index2 = bl_index + y * cm->mi_cols + x;
-               sum_map += seg_map[bl_index2];
-          }
-        new_value = 0;
+          for (x = 0; x < xmis; x++)
+            sum_map += seg_map[bl_index + y * cm->mi_cols + x];
         // If segment is partial over superblock, reset.
         if (sum_map > 0 && sum_map < xmis * ymis) {
-          if (sum_map < xmis * ymis / 2)
-            new_value = 0;
-          else
-            new_value = 1;
+          const int new_value = (sum_map >= xmis * ymis / 2);
           for (y = 0; y < ymis; y++)
-            for (x = 0; x < xmis; x++) {
-              bl_index2 = bl_index + y * cm->mi_cols + x;
-              seg_map[bl_index2] = new_value;
-            }
+            for (x = 0; x < xmis; x++)
+              seg_map[bl_index + y * cm->mi_cols + x] = new_value;
         }
       }
   }
diff --git a/vp9/encoder/vp9_craq.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
similarity index 89%
rename from vp9/encoder/vp9_craq.h
rename to vp9/encoder/vp9_aq_cyclicrefresh.h
index fec7748..14dc2cd 100644
--- a/vp9/encoder/vp9_craq.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -9,8 +9,8 @@
  */
 
 
-#ifndef VP9_ENCODER_VP9_CRAQ_H_
-#define VP9_ENCODER_VP9_CRAQ_H_
+#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
 
 #include "vp9/common/vp9_blockd.h"
 
@@ -51,10 +51,10 @@
 // check if we should reset the segment_id, and update the cyclic_refresh map
 // and segmentation map.
 void vp9_update_segment_aq(struct VP9_COMP *const cpi,
-                           MODE_INFO *const mi,
+                           MB_MODE_INFO *const mbmi,
                            int mi_row,
                            int mi_col,
-                           int bsize,
+                           BLOCK_SIZE bsize,
                            int use_rd);
 
 // Setup cyclic background refresh: set delta q and segmentation map.
@@ -64,4 +64,4 @@
 }  // extern "C"
 #endif
 
-#endif  // VP9_ENCODER_VP9_CRAQ_H_
+#endif  // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
diff --git a/vp9/encoder/vp9_vaq.c b/vp9/encoder/vp9_aq_variance.c
similarity index 98%
rename from vp9/encoder/vp9_vaq.c
rename to vp9/encoder/vp9_aq_variance.c
index c71c171..c25eb95 100644
--- a/vp9/encoder/vp9_vaq.c
+++ b/vp9/encoder/vp9_aq_variance.c
@@ -10,7 +10,7 @@
 
 #include <math.h>
 
-#include "vp9/encoder/vp9_vaq.h"
+#include "vp9/encoder/vp9_aq_variance.h"
 
 #include "vp9/common/vp9_seg_common.h"
 
diff --git a/vp9/encoder/vp9_vaq.h b/vp9/encoder/vp9_aq_variance.h
similarity index 86%
rename from vp9/encoder/vp9_vaq.h
rename to vp9/encoder/vp9_aq_variance.h
index c73114a..381fe50 100644
--- a/vp9/encoder/vp9_vaq.h
+++ b/vp9/encoder/vp9_aq_variance.h
@@ -9,8 +9,8 @@
  */
 
 
-#ifndef VP9_ENCODER_VP9_VAQ_H_
-#define VP9_ENCODER_VP9_VAQ_H_
+#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_
+#define VP9_ENCODER_VP9_AQ_VARIANCE_H_
 
 #include "vp9/encoder/vp9_onyx_int.h"
 
@@ -31,4 +31,4 @@
 }  // extern "C"
 #endif
 
-#endif  // VP9_ENCODER_VP9_VAQ_H_
+#endif  // VP9_ENCODER_VP9_AQ_VARIANCE_H_
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index d2df46b..a1db097 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -395,9 +395,7 @@
 static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
                             int hbs, int mi_row, int mi_col,
                             PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) {
-  const int ctx = partition_plane_context(xd->above_seg_context,
-                                          xd->left_seg_context,
-                                          mi_row, mi_col, bsize);
+  const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
   const vp9_prob *const probs = get_partition_probs(cm, ctx);
   const int has_rows = (mi_row + hbs) < cm->mi_rows;
   const int has_cols = (mi_col + hbs) < cm->mi_cols;
@@ -468,8 +466,7 @@
   // update partition context
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
-    update_partition_context(xd->above_seg_context, xd->left_seg_context,
-                             mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
 static void write_modes(VP9_COMP *cpi,
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 888984c..7729d84 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -157,7 +157,6 @@
 
   // note that token_costs is the cost when eob node is skipped
   vp9_coeff_cost token_costs[TX_SIZES];
-  DECLARE_ALIGNED(16, uint8_t, token_cache[1024]);
 
   int optimize;
 
@@ -197,7 +196,8 @@
 // TODO(jingning): the variables used here are little complicated. need further
 // refactoring on organizing the temporary buffers, when recursive
 // partition down to 4x4 block size is enabled.
-static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {
+static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
+                                                   BLOCK_SIZE bsize) {
   switch (bsize) {
     case BLOCK_64X64:
       return &x->sb64_context;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a0196e6..1604ba9 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -30,6 +30,8 @@
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_tile_common.h"
 
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
+#include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_encodemv.h"
@@ -38,8 +40,6 @@
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_segmentation.h"
 #include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_vaq.h"
-#include "vp9/encoder/vp9_craq.h"
 
 #define GF_ZEROMV_ZBIN_BOOST 0
 #define LF_ZEROMV_ZBIN_BOOST 0
@@ -903,7 +903,8 @@
       output_enabled) {
     // Check for reseting segment_id and update cyclic map.
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) {
-      vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1);
+      vp9_update_segment_aq(cpi, &xd->mi_8x8[0]->mbmi,
+                            mi_row, mi_col, bsize, 1);
       vp9_init_plane_quantizers(cpi, x);
     }
     mi->mbmi.segment_id = xd->mi_8x8[0]->mbmi.segment_id;
@@ -1295,8 +1296,7 @@
     return;
 
   if (bsize >= BLOCK_8X8) {
-    ctx = partition_plane_context(xd->above_seg_context, xd->left_seg_context,
-                                 mi_row, mi_col, bsize);
+    ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
     subsize = *get_sb_partitioning(x, bsize);
   } else {
     ctx = 0;
@@ -1351,8 +1351,7 @@
   }
 
   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
-    update_partition_context(xd->above_seg_context, xd->left_seg_context,
-                             mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
 // Check to see if the given partition size is allowed for a specified number
@@ -1472,7 +1471,7 @@
 
   // Check for reseting segment_id and update cyclic map.
   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) {
-    vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1);
+    vp9_update_segment_aq(cpi, &xd->mi_8x8[0]->mbmi, mi_row, mi_col, bsize, 1);
     vp9_init_plane_quantizers(cpi, x);
   }
 
@@ -1526,8 +1525,7 @@
     MACROBLOCKD *const xd = &cpi->mb.e_mbd;
     const int idx_str = xd->mode_info_stride * mi_row + mi_col;
     MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
-    ctx = partition_plane_context(xd->above_seg_context, xd->left_seg_context,
-                                 mi_row, mi_col, bsize);
+    ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
     subsize = mi_8x8[0]->mbmi.sb_type;
   } else {
     ctx = 0;
@@ -1586,8 +1584,7 @@
   }
 
   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
-    update_partition_context(xd->above_seg_context, xd->left_seg_context,
-                             mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
 static void rd_use_partition(VP9_COMP *cpi,
@@ -1601,10 +1598,7 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   const int mis = cm->mode_info_stride;
   const int bsl = b_width_log2(bsize);
-  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
-  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-  const int ms = num_4x4_blocks_wide / 2;
-  const int mh = num_4x4_blocks_high / 2;
+  const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
   const int bss = (1 << bsl) / 4;
   int i, pl;
   PARTITION_TYPE partition = PARTITION_NONE;
@@ -1623,10 +1617,14 @@
   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   int splits_below = 0;
   BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
+  int do_partition_search = 1;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
+  assert(num_4x4_blocks_wide_lookup[bsize] ==
+         num_4x4_blocks_high_lookup[bsize]);
+
   partition = partition_lookup[bsl][bs_type];
   subsize = get_subsize(bsize, partition);
 
@@ -1643,12 +1641,23 @@
   }
   save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
 
+  set_offsets(cpi, tile, mi_row, mi_col, bsize);
   if (bsize == BLOCK_16X16) {
-    set_offsets(cpi, tile, mi_row, mi_col, bsize);
     x->mb_energy = vp9_block_energy(cpi, x, bsize);
   }
 
-  if (cpi->sf.partition_search_type == SEARCH_PARTITION &&
+  if (!x->in_active_map) {
+    do_partition_search = 0;
+    if (mi_row + (mi_step >> 1) < cm->mi_rows &&
+        mi_col + (mi_step >> 1) < cm->mi_cols) {
+      *(get_sb_partitioning(x, bsize)) = bsize;
+      bs_type = mi_8x8[0]->mbmi.sb_type = bsize;
+      subsize = bsize;
+      partition = PARTITION_NONE;
+    }
+  }
+  if (do_partition_search &&
+      cpi->sf.partition_search_type == SEARCH_PARTITION &&
       cpi->sf.adjust_partitioning_from_last_frame) {
     // Check if any of the sub blocks are further split.
     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
@@ -1666,15 +1675,13 @@
     // If partition is not none try none unless each of the 4 splits are split
     // even further..
     if (partition != PARTITION_NONE && !splits_below &&
-        mi_row + (ms >> 1) < cm->mi_rows &&
-        mi_col + (ms >> 1) < cm->mi_cols) {
+        mi_row + (mi_step >> 1) < cm->mi_rows &&
+        mi_col + (mi_step >> 1) < cm->mi_cols) {
       *(get_sb_partitioning(x, bsize)) = bsize;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
                        get_block_context(x, bsize), INT64_MAX);
 
-      pl = partition_plane_context(xd->above_seg_context,
-                                   xd->left_seg_context,
-                                   mi_row, mi_col, bsize);
+      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
 
       if (none_rate < INT_MAX) {
         none_rate += x->partition_cost[pl][PARTITION_NONE];
@@ -1699,14 +1706,14 @@
                        &last_part_dist, subsize,
                        get_block_context(x, subsize), INT64_MAX);
       if (last_part_rate != INT_MAX &&
-          bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) {
+          bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
         int rt = 0;
         int64_t dt = 0;
         update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
                      subsize, 0);
         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
         *get_sb_index(x, subsize) = 1;
-        rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt,
+        rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt,
                          subsize, get_block_context(x, subsize), INT64_MAX);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
@@ -1724,14 +1731,14 @@
                        &last_part_dist, subsize,
                        get_block_context(x, subsize), INT64_MAX);
       if (last_part_rate != INT_MAX &&
-          bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
+          bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
         int rt = 0;
         int64_t dt = 0;
         update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
                      subsize, 0);
         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
         *get_sb_index(x, subsize) = 1;
-        rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt,
+        rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt,
                          subsize, get_block_context(x, subsize), INT64_MAX);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
@@ -1747,8 +1754,8 @@
       last_part_rate = 0;
       last_part_dist = 0;
       for (i = 0; i < 4; i++) {
-        int x_idx = (i & 1) * (ms >> 1);
-        int y_idx = (i >> 1) * (ms >> 1);
+        int x_idx = (i & 1) * (mi_step >> 1);
+        int y_idx = (i >> 1) * (mi_step >> 1);
         int jj = i >> 1, ii = i & 0x01;
         int rt;
         int64_t dt;
@@ -1774,18 +1781,20 @@
       assert(0);
   }
 
-  pl = partition_plane_context(xd->above_seg_context, xd->left_seg_context,
-                               mi_row, mi_col, bsize);
+  pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   if (last_part_rate < INT_MAX) {
     last_part_rate += x->partition_cost[pl][partition];
     last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist);
   }
 
-  if (cpi->sf.adjust_partitioning_from_last_frame
+  if (do_partition_search
+      && cpi->sf.adjust_partitioning_from_last_frame
       && cpi->sf.partition_search_type == SEARCH_PARTITION
       && partition != PARTITION_SPLIT && bsize > BLOCK_8X8
-      && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
-      && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
+      && (mi_row + mi_step < cm->mi_rows ||
+          mi_row + (mi_step >> 1) == cm->mi_rows)
+      && (mi_col + mi_step < cm->mi_cols ||
+          mi_col + (mi_step >> 1) == cm->mi_cols)) {
     BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
     chosen_rate = 0;
     chosen_dist = 0;
@@ -1793,8 +1802,8 @@
 
     // Split partition.
     for (i = 0; i < 4; i++) {
-      int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2);
-      int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2);
+      int x_idx = (i & 1) * (mi_step >> 1);
+      int y_idx = (i >> 1) * (mi_step >> 1);
       int rt = 0;
       int64_t dt = 0;
       ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
@@ -1828,14 +1837,11 @@
         encode_sb(cpi, tile, tp,  mi_row + y_idx, mi_col + x_idx, 0,
                   split_subsize);
 
-      pl = partition_plane_context(xd->above_seg_context,
-                                   xd->left_seg_context,
-                                   mi_row + y_idx, mi_col + x_idx,
+      pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
                                    split_subsize);
       chosen_rate += x->partition_cost[pl][PARTITION_NONE];
     }
-    pl = partition_plane_context(xd->above_seg_context, xd->left_seg_context,
-                                 mi_row, mi_col, bsize);
+    pl = partition_plane_context(xd, mi_row, mi_col, bsize);
     if (chosen_rate < INT_MAX) {
       chosen_rate += x->partition_cost[pl][PARTITION_SPLIT];
       chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist);
@@ -2029,7 +2035,7 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
+  const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   PARTITION_CONTEXT sl[8], sa[8];
   TOKENEXTRA *tp_orig = *tp;
@@ -2042,8 +2048,8 @@
   int do_split = bsize >= BLOCK_8X8;
   int do_rect = 1;
   // Override skipping rectangular partition operations for edge blocks
-  const int force_horz_split = (mi_row + ms >= cm->mi_rows);
-  const int force_vert_split = (mi_col + ms >= cm->mi_cols);
+  const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
+  const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
   const int xss = x->e_mbd.plane[1].subsampling_x;
   const int yss = x->e_mbd.plane[1].subsampling_y;
 
@@ -2110,9 +2116,7 @@
                      ctx, best_rd);
     if (this_rate != INT_MAX) {
       if (bsize >= BLOCK_8X8) {
-        pl = partition_plane_context(xd->above_seg_context,
-                                     xd->left_seg_context,
-                                     mi_row, mi_col, bsize);
+        pl = partition_plane_context(xd, mi_row, mi_col, bsize);
         this_rate += x->partition_cost[pl][PARTITION_NONE];
       }
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
@@ -2157,8 +2161,8 @@
   if (do_split) {
     subsize = get_subsize(bsize, PARTITION_SPLIT);
     for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
-      const int x_idx = (i & 1) * ms;
-      const int y_idx = (i >> 1) * ms;
+      const int x_idx = (i & 1) * mi_step;
+      const int y_idx = (i >> 1) * mi_step;
 
       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
         continue;
@@ -2182,9 +2186,7 @@
       }
     }
     if (sum_rd < best_rd && i == 4) {
-      pl = partition_plane_context(xd->above_seg_context,
-                                   xd->left_seg_context,
-                                   mi_row, mi_col, bsize);
+      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -2216,7 +2218,7 @@
                      get_block_context(x, subsize), best_rd);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
 
-    if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
+    if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) {
       update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
                    subsize, 0);
       encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
@@ -2228,7 +2230,7 @@
           partition_none_allowed)
         get_block_context(x, subsize)->pred_interp_filter =
             ctx->mic.mbmi.interp_filter;
-      rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
+      rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate,
                        &this_dist, subsize, get_block_context(x, subsize),
                        best_rd - sum_rd);
       if (this_rate == INT_MAX) {
@@ -2240,9 +2242,7 @@
       }
     }
     if (sum_rd < best_rd) {
-      pl = partition_plane_context(xd->above_seg_context,
-                                   xd->left_seg_context,
-                                   mi_row, mi_col, bsize);
+      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_HORZ];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -2269,7 +2269,7 @@
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                      get_block_context(x, subsize), best_rd);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
-    if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
+    if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) {
       update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
                    subsize, 0);
       encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
@@ -2281,7 +2281,7 @@
           partition_none_allowed)
         get_block_context(x, subsize)->pred_interp_filter =
             ctx->mic.mbmi.interp_filter;
-      rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
+      rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate,
                        &this_dist, subsize, get_block_context(x, subsize),
                        best_rd - sum_rd);
       if (this_rate == INT_MAX) {
@@ -2293,9 +2293,7 @@
       }
     }
     if (sum_rd < best_rd) {
-      pl = partition_plane_context(xd->above_seg_context,
-                                   xd->left_seg_context,
-                                   mi_row, mi_col, bsize);
+      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_VERT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -2522,87 +2520,15 @@
   return 1;
 }
 
-static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs,
-                          TX_SIZE tx_size) {
-  int x, y;
-
-  for (y = 0; y < ymbs; y++) {
-    for (x = 0; x < xmbs; x++)
-      mi_8x8[y * mis + x]->mbmi.tx_size = tx_size;
-  }
-}
-
-static void reset_skip_txfm_size_b(const VP9_COMMON *cm, int mis,
-                                   TX_SIZE max_tx_size, int bw, int bh,
-                                   int mi_row, int mi_col,
-                                   MODE_INFO **mi_8x8) {
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
-    return;
-  } else {
-    const MB_MODE_INFO *const mbmi = &mi_8x8[0]->mbmi;
-    if (mbmi->tx_size > max_tx_size) {
-      const int ymbs = MIN(bh, cm->mi_rows - mi_row);
-      const int xmbs = MIN(bw, cm->mi_cols - mi_col);
-
-      assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
-             get_skip_flag(mi_8x8, mis, ymbs, xmbs));
-      set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
-    }
-  }
-}
-
-static void reset_skip_txfm_size_sb(VP9_COMMON *cm, MODE_INFO **mi_8x8,
-                                    TX_SIZE max_tx_size, int mi_row, int mi_col,
-                                    BLOCK_SIZE bsize) {
-  const int mis = cm->mode_info_stride;
-  int bw, bh;
-  const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
-
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
-    return;
-
-  bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type];
-  bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type];
-
-  if (bw == bs && bh == bs) {
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, bs, mi_row, mi_col,
-                           mi_8x8);
-  } else if (bw == bs && bh < bs) {
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row, mi_col,
-                           mi_8x8);
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row + hbs,
-                           mi_col, mi_8x8 + hbs * mis);
-  } else if (bw < bs && bh == bs) {
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, mi_col,
-                           mi_8x8);
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row,
-                           mi_col + hbs, mi_8x8 + hbs);
-  } else {
-    const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
-    int n;
-
-    assert(bw < bs && bh < bs);
-
-    for (n = 0; n < 4; n++) {
-      const int mi_dc = hbs * (n & 1);
-      const int mi_dr = hbs * (n >> 1);
-
-      reset_skip_txfm_size_sb(cm, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size,
-                              mi_row + mi_dr, mi_col + mi_dc, subsize);
-    }
-  }
-}
-
 static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) {
   int mi_row, mi_col;
   const int mis = cm->mode_info_stride;
-  MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible;
+  MODE_INFO **mi_ptr = cm->mi_grid_visible;
 
-  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) {
-    mi_8x8 = mi_ptr;
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) {
-      reset_skip_txfm_size_sb(cm, mi_8x8, txfm_max, mi_row, mi_col,
-                              BLOCK_64X64);
+  for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
+    for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
+      if (mi_ptr[mi_col]->mbmi.tx_size > txfm_max)
+        mi_ptr[mi_col]->mbmi.tx_size = txfm_max;
     }
   }
 }
@@ -2824,54 +2750,65 @@
     cpi->mb.source_variance = UINT_MAX;
 
     // Set the partition type of the 64X64 block
-    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
-      choose_partitioning(cpi, tile, mi_row, mi_col);
-    else if (cpi->sf.partition_search_type == REFERENCE_PARTITION) {
-      if (cpi->sf.partition_check) {
-        MACROBLOCK *x = &cpi->mb;
-        int rate1 = 0, rate2 = 0, rate3 = 0;
-        int64_t dist1 = 0, dist2 = 0, dist3 = 0;
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_8X8);
+    switch (cpi->sf.partition_search_type) {
+      case VAR_BASED_PARTITION:
+        choose_partitioning(cpi, tile, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            0, &rate1, &dist1);
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_16X16);
+                            1, &dummy_rate, &dummy_dist);
+        break;
+      case VAR_BASED_FIXED_PARTITION:
+      case FIXED_PARTITION:
+        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            0, &rate2, &dist2);
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_32X32);
-        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            0, &rate3, &dist3);
+                            1, &dummy_rate, &dummy_dist);
+        break;
+      case REFERENCE_PARTITION:
+        if (cpi->sf.partition_check) {
+          MACROBLOCK *x = &cpi->mb;
+          int rate1 = 0, rate2 = 0, rate3 = 0;
+          int64_t dist1 = 0, dist2 = 0, dist3 = 0;
+          set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_8X8);
+          nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+                              BLOCK_64X64, 0, &rate1, &dist1);
+          set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+                                 BLOCK_16X16);
+          nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+                              BLOCK_64X64, 0, &rate2, &dist2);
+          set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+                                 BLOCK_32X32);
+          nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+                              BLOCK_64X64, 0, &rate3, &dist3);
 
-        if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) <
-            RDCOST(x->rdmult, x->rddiv, rate2, dist2)) {
           if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) <
-              RDCOST(x->rdmult, x->rddiv, rate3, dist3))
-            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
-                                   BLOCK_8X8);
-          else
-            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
-                                   BLOCK_32X32);
+              RDCOST(x->rdmult, x->rddiv, rate2, dist2)) {
+            if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) <
+                RDCOST(x->rdmult, x->rddiv, rate3, dist3))
+              set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+                                     BLOCK_8X8);
+            else
+              set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+                                     BLOCK_32X32);
+          } else {
+            if (RDCOST(x->rdmult, x->rddiv, rate2, dist2) <
+                RDCOST(x->rdmult, x->rddiv, rate3, dist3))
+              set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+                                     BLOCK_16X16);
+            else
+              set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+                                     BLOCK_32X32);
+          }
         } else {
-          if (RDCOST(x->rdmult, x->rddiv, rate2, dist2) <
-              RDCOST(x->rdmult, x->rddiv, rate3, dist3))
-            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
-                                   BLOCK_16X16);
+          if (!sb_has_motion(cm, prev_mi_8x8))
+            copy_partitioning(cm, mi_8x8, prev_mi_8x8);
           else
-            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
-                                   BLOCK_32X32);
+            set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         }
-
-      } else {
-        if (!sb_has_motion(cm, prev_mi_8x8))
-          copy_partitioning(cm, mi_8x8, prev_mi_8x8);
-        else
-          set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
-      }
+        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                            1, &dummy_rate, &dummy_dist);
+        break;
+      default:
+        assert(0);
     }
-    else
-      set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
-
-    nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1,
-                        &dummy_rate, &dummy_dist);
   }
 }
 // end RTC play code
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c4c219b..61ca996 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -23,6 +23,7 @@
 #include "vp9/common/vp9_reconinter.h"  // vp9_setup_dst_planes()
 #include "vp9/common/vp9_systemdependent.h"
 
+#include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_block.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemb.h"
@@ -34,7 +35,6 @@
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
-#include "vp9/encoder/vp9_vaq.h"
 #include "vp9/encoder/vp9_variance.h"
 
 #define OUTPUT_FPF 0
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 5d50d22..e0299f0 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -23,6 +23,11 @@
 
 // #define NEW_DIAMOND_SEARCH
 
+static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
+                                             const MV *mv) {
+  return &buf->buf[mv->row * buf->stride + mv->col];
+}
+
 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
   int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
   int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
@@ -399,7 +404,7 @@
   // calculate central point error
   // TODO(yunqingwang): central pointer error was already calculated in full-
   // pixel search, and can be passed in this function.
-  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
+  vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
   besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
   *distortion = besterr;
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -1326,10 +1331,8 @@
                           const MV *center_mv, MV *best_mv) {
   int r, c;
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *const what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *const in_what = xd->plane[0].pre[0].buf;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
   const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
   const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
@@ -1337,25 +1340,22 @@
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
-                                         ref_mv->col];
-  int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride,
-                             0x7fffffff) +
+  int best_sad = fn_ptr->sdf(what->buf, what->stride,
+      get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
       mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
   *best_mv = *ref_mv;
 
   for (r = row_min; r < row_max; ++r) {
     for (c = col_min; c < col_max; ++c) {
-      const MV this_mv = {r, c};
-      const uint8_t *check_here = &in_what[r * in_what_stride + c];
-      const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                                  best_sad) +
-          mvsad_err_cost(&this_mv, &fcenter_mv,
-                         mvjsadcost, mvsadcost, sad_per_bit);
+      const MV mv = {r, c};
+      const int sad = fn_ptr->sdf(what->buf, what->stride,
+          get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
+          mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                         sad_per_bit);
 
       if (sad < best_sad) {
         best_sad = sad;
-        *best_mv = this_mv;
+        *best_mv = mv;
       }
     }
   }
@@ -1579,41 +1579,34 @@
                               const vp9_variance_fn_ptr_t *fn_ptr,
                               int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
-  int i, j;
-
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *const what = x->plane[0].src.buf;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *const in_what = xd->plane[0].pre[0].buf;
-  const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
-                                             ref_mv->col];
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
 
-  unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address,
-                                     in_what_stride, 0x7fffffff) +
+  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+                                     get_buf_from_mv(in_what, ref_mv),
+                                     in_what->stride, 0x7fffffff) +
       mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+  int i, j;
 
   for (i = 0; i < search_range; i++) {
     int best_site = -1;
 
     for (j = 0; j < 4; j++) {
-      const MV this_mv = {ref_mv->row + neighbors[j].row,
-                          ref_mv->col + neighbors[j].col};
-      if (is_mv_in(x, &this_mv)) {
-        const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
-                                                this_mv.col];
-        unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
-                                           in_what_stride, bestsad);
-        if (thissad < bestsad) {
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, error_per_bit);
-
-          if (thissad < bestsad) {
-            bestsad = thissad;
+      const MV mv = {ref_mv->row + neighbors[j].row,
+                     ref_mv->col + neighbors[j].col};
+      if (is_mv_in(x, &mv)) {
+        unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+            get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+        if (sad < best_sad) {
+          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                                error_per_bit);
+          if (sad < best_sad) {
+            best_sad = sad;
             best_site = j;
           }
         }
@@ -1627,7 +1620,7 @@
       ref_mv->col += neighbors[best_site].col;
     }
   }
-  return bestsad;
+  return best_sad;
 }
 
 int vp9_refining_search_sadx4(const MACROBLOCK *x,
@@ -1735,46 +1728,36 @@
                              int *mvjcost, int *mvcost[2],
                              const MV *center_mv,
                              const uint8_t *second_pred, int w, int h) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
                            {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
-  int i, j;
-
-  const uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *in_what = xd->plane[0].pre[0].buf;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
-                                             ref_mv->col];
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
-  /* Get compound pred by averaging two pred blocks. */
-  unsigned int bestsad = fn_ptr->sdaf(what, what_stride,
-                                      best_address, in_what_stride,
-                                      second_pred, 0x7fffffff) +
+  unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
+      get_buf_from_mv(in_what, ref_mv), in_what->stride,
+      second_pred, 0x7fffffff) +
       mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+  int i, j;
 
   for (i = 0; i < search_range; ++i) {
     int best_site = -1;
 
-    for (j = 0; j < 8; j++) {
-      const MV this_mv = {ref_mv->row + neighbors[j].row,
-                          ref_mv->col + neighbors[j].col};
+    for (j = 0; j < 8; ++j) {
+      const MV mv = {ref_mv->row + neighbors[j].row,
+                     ref_mv->col + neighbors[j].col};
 
-      if (is_mv_in(x, &this_mv)) {
-        const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
-                                                this_mv.col];
-        unsigned int thissad = fn_ptr->sdaf(what, what_stride,
-                                            check_here, in_what_stride,
-                                            second_pred, bestsad);
-        if (thissad < bestsad) {
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+      if (is_mv_in(x, &mv)) {
+        unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
+            get_buf_from_mv(in_what, &mv), in_what->stride,
+            second_pred, best_sad);
+        if (sad < best_sad) {
+          sad += mvsad_err_cost(&mv, &fcenter_mv,
                                     mvjsadcost, mvsadcost, error_per_bit);
-          if (thissad < bestsad) {
-            bestsad = thissad;
+          if (sad < best_sad) {
+            best_sad = sad;
             best_site = j;
           }
         }
@@ -1788,5 +1771,5 @@
       ref_mv->col += neighbors[best_site].col;
     }
   }
-  return bestsad;
+  return best_sad;
 }
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 02ba216..29717f7 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -27,8 +27,9 @@
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_tile_common.h"
 
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
+#include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_bitstream.h"
-#include "vp9/encoder/vp9_craq.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_firstpass.h"
@@ -39,7 +40,6 @@
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_segmentation.h"
 #include "vp9/encoder/vp9_temporal_filter.h"
-#include "vp9/encoder/vp9_vaq.h"
 #include "vp9/encoder/vp9_resize.h"
 #include "vp9/encoder/vp9_svc_layercontext.h"
 
@@ -148,13 +148,13 @@
   static int init_done = 0;
 
   if (!init_done) {
-    vp9_initialize_common();
+    vp9_init_neighbors();
+    vp9_init_quant_tables();
+
     vp9_coef_tree_initialize();
     vp9_tokenize_initialize();
-    vp9_init_quant_tables();
     vp9_init_me_luts();
     vp9_rc_init_minq_luts();
-    // init_base_skip_probs();
     vp9_entropy_mv_init();
     vp9_entropy_mode_init();
     init_done = 1;
@@ -840,7 +840,7 @@
   if (speed >= 4) {
     sf->optimize_coefficients = 0;
     sf->disable_split_mask = DISABLE_ALL_SPLIT;
-    sf->use_fast_lpf_pick = 2;
+    sf->lpf_pick = LPF_PICK_FROM_Q;
     sf->encode_breakout_thresh = 700;
   }
   if (speed >= 5) {
@@ -939,7 +939,7 @@
   sf->use_rd_breakout = 0;
   sf->skip_encode_sb = 0;
   sf->use_uv_intra_rd_estimate = 0;
-  sf->use_fast_lpf_pick = 0;
+  sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
   sf->use_fast_coef_updates = 0;
   sf->use_fast_coef_costing = 0;
   sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
@@ -1202,9 +1202,39 @@
   cm->log2_tile_rows = cpi->oxcf.tile_rows;
 }
 
+static void init_rate_control(const VP9_CONFIG *oxcf, int pass,
+                              RATE_CONTROL *rc) {
+  if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+    rc->avg_frame_qindex[0] = oxcf->worst_allowed_q;
+    rc->avg_frame_qindex[1] = oxcf->worst_allowed_q;
+    rc->avg_frame_qindex[2] = oxcf->worst_allowed_q;
+  } else {
+    rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q +
+                                   oxcf->best_allowed_q) / 2;
+    rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q +
+                                   oxcf->best_allowed_q) / 2;
+    rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q +
+                                   oxcf->best_allowed_q) / 2;
+  }
+
+  rc->last_q[0] = oxcf->best_allowed_q;
+  rc->last_q[1] = oxcf->best_allowed_q;
+  rc->last_q[2] = oxcf->best_allowed_q;
+
+  rc->buffer_level =    oxcf->starting_buffer_level;
+  rc->bits_off_target = oxcf->starting_buffer_level;
+
+  rc->rolling_target_bits      = rc->av_per_frame_bandwidth;
+  rc->rolling_actual_bits      = rc->av_per_frame_bandwidth;
+  rc->long_rolling_target_bits = rc->av_per_frame_bandwidth;
+  rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth;
+
+  rc->total_actual_bits = 0;
+  rc->total_target_vs_actual = 0;
+}
+
 static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
   int i;
 
   cpi->oxcf = *oxcf;
@@ -1230,35 +1260,6 @@
   // change includes all joint functionality
   vp9_change_config(cpi, oxcf);
 
-  // Initialize active best and worst q and average q values.
-  if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    rc->avg_frame_qindex[0] = cpi->oxcf.worst_allowed_q;
-    rc->avg_frame_qindex[1] = cpi->oxcf.worst_allowed_q;
-    rc->avg_frame_qindex[2] = cpi->oxcf.worst_allowed_q;
-  } else {
-    rc->avg_frame_qindex[0] = (cpi->oxcf.worst_allowed_q +
-                                  cpi->oxcf.best_allowed_q) / 2;
-    rc->avg_frame_qindex[1] = (cpi->oxcf.worst_allowed_q +
-                                  cpi->oxcf.best_allowed_q) / 2;
-    rc->avg_frame_qindex[2] = (cpi->oxcf.worst_allowed_q +
-                                  cpi->oxcf.best_allowed_q) / 2;
-  }
-  rc->last_q[0] = cpi->oxcf.best_allowed_q;
-  rc->last_q[1] = cpi->oxcf.best_allowed_q;
-  rc->last_q[2] = cpi->oxcf.best_allowed_q;
-
-  // Initialise the starting buffer levels
-  rc->buffer_level    = cpi->oxcf.starting_buffer_level;
-  rc->bits_off_target = cpi->oxcf.starting_buffer_level;
-
-  rc->rolling_target_bits      = rc->av_per_frame_bandwidth;
-  rc->rolling_actual_bits      = rc->av_per_frame_bandwidth;
-  rc->long_rolling_target_bits = rc->av_per_frame_bandwidth;
-  rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth;
-
-  rc->total_actual_bits = 0;
-  rc->total_target_vs_actual = 0;
-
   cpi->static_mb_pct = 0;
 
   cpi->lst_fb_idx = 0;
@@ -1272,15 +1273,11 @@
     cpi->fixed_divide[i] = 0x80000 / i;
 }
 
-void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
+void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
 
-  if (!cpi || !oxcf)
-    return;
-
-  if (cm->version != oxcf->version) {
+  if (cm->version != oxcf->version)
     cm->version = oxcf->version;
-  }
 
   cpi->oxcf = *oxcf;
 
@@ -1631,6 +1628,7 @@
   cpi->use_svc = 0;
 
   init_config(cpi, oxcf);
+  init_rate_control(&cpi->oxcf, cpi->pass, &cpi->rc);
   init_pick_mode_context(cpi);
 
   cm->current_video_frame = 0;
@@ -2518,7 +2516,7 @@
 
     vpx_usec_timer_start(&timer);
 
-    vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick);
+    vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
 
     vpx_usec_timer_mark(&timer);
     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 2aa67ae..022a63f 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -23,7 +23,7 @@
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
-#include "vp9/encoder/vp9_craq.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_lookahead.h"
@@ -210,6 +210,15 @@
   VAR_BASED_PARTITION
 } PARTITION_SEARCH_TYPE;
 
+typedef enum {
+  // Try the full image with different values.
+  LPF_PICK_FROM_FULL_IMAGE,
+  // Try a small portion of the image with different values.
+  LPF_PICK_FROM_SUBIMAGE,
+  // Estimate the level based on quantizer and frame type
+  LPF_PICK_FROM_Q,
+} LPF_PICK_METHOD;
+
 typedef struct {
   // Frame level coding parameter update
   int frame_parameter_update;
@@ -380,11 +389,8 @@
   // final encode.
   int use_uv_intra_rd_estimate;
 
-  // This feature controls how the loop filter level is determined:
-  // 0: Try the full image with different values.
-  // 1: Try a small portion of the image with different values.
-  // 2: Estimate the level based on quantizer and frame type
-  int use_fast_lpf_pick;
+  // This feature controls how the loop filter level is determined.
+  LPF_PICK_METHOD lpf_pick;
 
   // This feature limits the number of coefficients updates we actually do
   // by only looking at counts from 1/2 the bands.
@@ -774,7 +780,6 @@
   unsigned int activity_avg;
   unsigned int *mb_activity_map;
   int *mb_norm_activity_map;
-  int output_partition;
 
   // Force next frame to intra when kf_auto says so.
   int force_next_frame_intra;
@@ -820,7 +825,7 @@
 struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf);
 void vp9_remove_compressor(VP9_COMP *cpi);
 
-void vp9_change_config(VP9_COMP *cpi, VP9_CONFIG *oxcf);
+void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf);
 
   // receive a frames worth of data. caller can assume that a copy of this
   // frame is made and not just a copy of the pointer..
@@ -869,8 +874,8 @@
 
 int vp9_get_quantizer(struct VP9_COMP *cpi);
 
-static int get_ref_frame_idx(const VP9_COMP *cpi,
-                             MV_REFERENCE_FRAME ref_frame) {
+static INLINE int get_ref_frame_idx(const VP9_COMP *cpi,
+                                    MV_REFERENCE_FRAME ref_frame) {
   if (ref_frame == LAST_FRAME) {
     return cpi->lst_fb_idx;
   } else if (ref_frame == GOLDEN_FRAME) {
@@ -880,15 +885,25 @@
   }
 }
 
-static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi,
-                                                MV_REFERENCE_FRAME ref_frame) {
-  VP9_COMMON *const cm = &cpi->common;
-  return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi,
-                                                             ref_frame)]].buf;
+static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
+    VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
+  VP9_COMMON * const cm = &cpi->common;
+  return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]]
+      .buf;
 }
 
 void vp9_set_speed_features(VP9_COMP *cpi);
 
+static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
+  // TODO(JBB): make this work for alpha channel and double check we can't
+  // exceed this token count if we have a 32x32 transform crossing a boundary
+  // at a multiple of 16.
+  // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
+  // resolution. We assume up to 1 token per pixel, and then allow
+  // a head room of 4.
+  return mb_rows * mb_cols * (16 * 16 * 3 + 4);
+}
+
 int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
                     const YV12_BUFFER_CONFIG *reference);
 
@@ -903,16 +918,13 @@
 
 void vp9_update_reference_frames(VP9_COMP *cpi);
 
-static int get_token_alloc(int mb_rows, int mb_cols) {
-  return mb_rows * mb_cols * (48 * 16 + 4);
-}
-
 extern const int q_trans[];
 
 int64_t vp9_rescale(int64_t val, int64_t num, int denom);
 
-static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
-                         MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) {
+static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
+                                MV_REFERENCE_FRAME ref0,
+                                MV_REFERENCE_FRAME ref1) {
   xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
                                                          : 0];
   xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 7695f16..92ad1e7 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -132,14 +132,14 @@
 }
 
 void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
-                           int method) {
+                           LPF_PICK_METHOD method) {
   VP9_COMMON *const cm = &cpi->common;
   struct loopfilter *const lf = &cm->lf;
 
   lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
                                                     : cpi->oxcf.sharpness;
 
-  if (method == 2) {
+  if (method == LPF_PICK_FROM_Q) {
     const int min_filter_level = 0;
     const int max_filter_level = get_max_filter_level(cpi);
     const int q = vp9_ac_quant(cm->base_qindex, 0);
@@ -150,6 +150,6 @@
       filt_guess -= 4;
     lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
   } else {
-    search_filter_level(sd, cpi, method == 1);
+    search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
   }
 }
diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h
index 203ef87..7d08ddb 100644
--- a/vp9/encoder/vp9_picklpf.h
+++ b/vp9/encoder/vp9_picklpf.h
@@ -16,11 +16,13 @@
 extern "C" {
 #endif
 
+#include "vp9/encoder/vp9_onyx_int.h"
+
 struct yv12_buffer_config;
 struct VP9_COMP;
 
 void vp9_pick_filter_level(const struct yv12_buffer_config *sd,
-                           struct VP9_COMP *cpi, int method);
+                           struct VP9_COMP *cpi, LPF_PICK_METHOD method);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index edc48bb..12743b2 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -55,10 +55,9 @@
 // formulaic approach to facilitate easier adjustment of the Q tables.
 // The formulae were derived from computing a 3rd order polynomial best
 // fit to the original data (after plotting real maxq vs minq (not q index))
-static int calculate_minq_index(double maxq,
-                                double x3, double x2, double x1, double c) {
+static int get_minq_index(double maxq, double x3, double x2, double x1) {
   int i;
-  const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c,
+  const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq,
                                 maxq);
 
   // Special case handling to deal with the step from q2.0
@@ -66,57 +65,26 @@
   if (minqtarget <= 2.0)
     return 0;
 
-  for (i = 0; i < QINDEX_RANGE; i++) {
+  for (i = 0; i < QINDEX_RANGE; i++)
     if (minqtarget <= vp9_convert_qindex_to_q(i))
       return i;
-  }
 
   return QINDEX_RANGE - 1;
 }
 
-void vp9_rc_init_minq_luts(void) {
+void vp9_rc_init_minq_luts() {
   int i;
 
   for (i = 0; i < QINDEX_RANGE; i++) {
     const double maxq = vp9_convert_qindex_to_q(i);
 
-
-    kf_low_motion_minq[i] = calculate_minq_index(maxq,
-                                                 0.000001,
-                                                 -0.0004,
-                                                 0.15,
-                                                 0.0);
-    kf_high_motion_minq[i] = calculate_minq_index(maxq,
-                                                  0.000002,
-                                                  -0.0012,
-                                                  0.50,
-                                                  0.0);
-
-    gf_low_motion_minq[i] = calculate_minq_index(maxq,
-                                                 0.0000015,
-                                                 -0.0009,
-                                                 0.32,
-                                                 0.0);
-    gf_high_motion_minq[i] = calculate_minq_index(maxq,
-                                                  0.0000021,
-                                                  -0.00125,
-                                                  0.50,
-                                                  0.0);
-    afq_low_motion_minq[i] = calculate_minq_index(maxq,
-                                                  0.0000015,
-                                                  -0.0009,
-                                                  0.33,
-                                                  0.0);
-    afq_high_motion_minq[i] = calculate_minq_index(maxq,
-                                                   0.0000021,
-                                                   -0.00125,
-                                                   0.55,
-                                                   0.0);
-    inter_minq[i] = calculate_minq_index(maxq,
-                                         0.00000271,
-                                         -0.00113,
-                                         0.75,
-                                         0.0);
+    kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15);
+    kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50);
+    gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32);
+    gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
+    afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
+    afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
+    inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
   }
 }
 
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index ed6266f..2754395 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -92,8 +92,7 @@
 
 double vp9_convert_qindex_to_q(int qindex);
 
-// initialize luts for minq
-void vp9_rc_init_minq_luts(void);
+void vp9_rc_init_minq_luts();
 
 // Generally at the high level, the following flow is expected
 // to be enforced for rate control:
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 2fd25ef..93f9999 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -566,7 +566,7 @@
   const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
                    x->token_costs[tx_size][type][is_inter_block(mbmi)];
-  uint8_t *p_tok = x->token_cache;
+  uint8_t token_cache[32 * 32];
   int pt = combine_entropy_contexts(*A, *L);
   int c, cost;
   // Check for consistency of tx_size with mode info
@@ -584,7 +584,7 @@
     int v = qcoeff[0];
     int prev_t = vp9_dct_value_tokens_ptr[v].token;
     cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
-    p_tok[0] = vp9_pt_energy_class[prev_t];
+    token_cache[0] = vp9_pt_energy_class[prev_t];
     ++token_costs;
 
     // ac tokens
@@ -597,9 +597,9 @@
       if (use_fast_coef_costing) {
         cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
       } else {
-        pt = get_coef_context(nb, p_tok, c);
+        pt = get_coef_context(nb, token_cache, c);
         cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
-        p_tok[rc] = vp9_pt_energy_class[t];
+        token_cache[rc] = vp9_pt_energy_class[t];
       }
       prev_t = t;
       if (!--band_left) {
@@ -613,7 +613,7 @@
       if (use_fast_coef_costing) {
         cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
       } else {
-        pt = get_coef_context(nb, p_tok, c);
+        pt = get_coef_context(nb, token_cache, c);
         cost += (*token_costs)[0][pt][EOB_TOKEN];
       }
     }
diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c
index 58c5df4..9d8da0d 100644
--- a/vp9/encoder/vp9_sad.c
+++ b/vp9/encoder/vp9_sad.c
@@ -44,7 +44,7 @@
                                       const uint8_t *second_pred, \
                                       unsigned int max_sad) { \
   uint8_t comp_pred[m * n]; \
-  comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \
+  vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \
   return sad(src_ptr, src_stride, comp_pred, m, m, n); \
 }
 
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index eba7bc6..4ccc2bd 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -15,13 +15,14 @@
 
 void vp9_init_layer_context(VP9_COMP *const cpi) {
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
-  int temporal_layer = 0;
+  int layer;
+
   cpi->svc.spatial_layer_id = 0;
   cpi->svc.temporal_layer_id = 0;
-  for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
-      ++temporal_layer) {
-    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+  for (layer = 0; layer < cpi->svc.number_temporal_layers; ++layer) {
+    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer];
     RATE_CONTROL *const lrc = &lc->rc;
+
     lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
     lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
     lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q];
@@ -35,11 +36,9 @@
     lrc->decimation_factor = 0;
     lrc->rate_correction_factor = 1.0;
     lrc->key_frame_rate_correction_factor = 1.0;
-    lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] *
-        1000;
-    lrc->buffer_level =
-        vp9_rescale((int)(oxcf->starting_buffer_level),
-                    lc->target_bandwidth, 1000);
+    lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+    lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level),
+                                    lc->target_bandwidth, 1000);
     lrc->bits_off_target = lrc->buffer_level;
   }
 }
@@ -49,14 +48,14 @@
                                             const int target_bandwidth) {
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
   const RATE_CONTROL *const rc = &cpi->rc;
-  int temporal_layer = 0;
+  int layer;
   float bitrate_alloc = 1.0;
-  for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
-      ++temporal_layer) {
-    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+
+  for (layer = 0; layer < cpi->svc.number_temporal_layers; ++layer) {
+    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer];
     RATE_CONTROL *const lrc = &lc->rc;
-    lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000;
-    bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth;
+    lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+    bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
     // Update buffer-related quantities.
     lc->starting_buffer_level =
         (int64_t)(oxcf->starting_buffer_level * bitrate_alloc);
@@ -67,7 +66,7 @@
     lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
     lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size);
     // Update framerate-related quantities.
-    lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+    lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer];
     lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
     lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
     // Update qp-related quantities.
@@ -76,22 +75,27 @@
   }
 }
 
+static LAYER_CONTEXT *get_temporal_layer_context(SVC *svc) {
+  return &svc->layer_context[svc->temporal_layer_id];
+}
+
 void vp9_update_layer_framerate(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
+  const int layer = cpi->svc.temporal_layer_id;
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
-  LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+  LAYER_CONTEXT *const lc = get_temporal_layer_context(&cpi->svc);
   RATE_CONTROL *const lrc = &lc->rc;
-  lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+
+  lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer];
   lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
   lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
   // Update the average layer frame size (non-cumulative per-frame-bw).
-  if (temporal_layer == 0) {
+  if (layer == 0) {
     lc->avg_frame_size = lrc->av_per_frame_bandwidth;
   } else {
-    double prev_layer_framerate = oxcf->framerate /
-        oxcf->ts_rate_decimator[temporal_layer - 1];
-    int prev_layer_target_bandwidth =
-        oxcf->ts_target_bitrate[temporal_layer - 1] * 1000;
+    const double prev_layer_framerate =
+        oxcf->framerate / oxcf->ts_rate_decimator[layer - 1];
+    const int prev_layer_target_bandwidth =
+        oxcf->ts_target_bitrate[layer - 1] * 1000;
     lc->avg_frame_size =
         (int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
               (lc->framerate - prev_layer_framerate));
@@ -99,10 +103,10 @@
 }
 
 void vp9_restore_layer_context(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
-  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-  int frame_since_key = cpi->rc.frames_since_key;
-  int frame_to_key = cpi->rc.frames_to_key;
+  LAYER_CONTEXT *const lc = get_temporal_layer_context(&cpi->svc);
+  const int old_frame_since_key = cpi->rc.frames_since_key;
+  const int old_frame_to_key = cpi->rc.frames_to_key;
+
   cpi->rc = lc->rc;
   cpi->oxcf.target_bandwidth = lc->target_bandwidth;
   cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
@@ -111,17 +115,18 @@
   cpi->output_framerate = lc->framerate;
   // Reset the frames_since_key and frames_to_key counters to their values
   // before the layer restore. Keep these defined for the stream (not layer).
-  cpi->rc.frames_since_key = frame_since_key;
-  cpi->rc.frames_to_key = frame_to_key;
+  cpi->rc.frames_since_key = old_frame_since_key;
+  cpi->rc.frames_to_key = old_frame_to_key;
 }
 
 void vp9_save_layer_context(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
-  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  LAYER_CONTEXT *const lc = get_temporal_layer_context(&cpi->svc);
+
   lc->rc = cpi->rc;
-  lc->target_bandwidth = (int)cpi->oxcf.target_bandwidth;
-  lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
-  lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
-  lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
+  lc->target_bandwidth = (int)oxcf->target_bandwidth;
+  lc->starting_buffer_level = oxcf->starting_buffer_level;
+  lc->optimal_buffer_level = oxcf->optimal_buffer_level;
+  lc->maximum_buffer_size = oxcf->maximum_buffer_size;
   lc->framerate = cpi->output_framerate;
 }
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index bb5f1c2..b6211e5 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -108,7 +108,7 @@
   vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree);
 }
 
-static void fill_value_tokens() {
+void vp9_tokenize_initialize() {
   TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE;
   const vp9_extra_bit *const e = vp9_extra_bits;
 
@@ -162,7 +162,6 @@
   VP9_COMP *cpi;
   MACROBLOCKD *xd;
   TOKENEXTRA **tp;
-  uint8_t *token_cache;
 };
 
 static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -213,7 +212,7 @@
   VP9_COMP *cpi = args->cpi;
   MACROBLOCKD *xd = args->xd;
   TOKENEXTRA **tp = args->tp;
-  uint8_t *token_cache = args->token_cache;
+  uint8_t token_cache[32 * 32];
   struct macroblock_plane *p = &cpi->mb.plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
@@ -315,7 +314,7 @@
   const int ctx = vp9_get_skip_context(xd);
   const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                               SEG_LVL_SKIP);
-  struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache};
+  struct tokenize_b_args arg = {cpi, xd, t};
   if (mbmi->skip) {
     if (!dry_run)
       cm->counts.skip[ctx][1] += skip_inc;
@@ -333,7 +332,3 @@
     *t = t_backup;
   }
 }
-
-void vp9_tokenize_initialize() {
-  fill_value_tokens();
-}
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 8bc3850..996f730 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -216,7 +216,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 64, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
-  comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
+  vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
   return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -273,7 +273,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 32, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
-  comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
+  vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
   return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -330,7 +330,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 32, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
-  comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
+  vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
   return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -387,7 +387,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 16, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
-  comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
+  vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
   return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -614,7 +614,7 @@
 
   // Now filter Verticaly
   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
-  comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
+  vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
   return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -658,7 +658,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 8, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
-  comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
+  vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
   return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -703,7 +703,7 @@
                                     1, 17, 16, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
 
-  comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
+  vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
   return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -747,7 +747,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 64, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
-  comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
+  vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
   return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -791,7 +791,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 32, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
-  comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
+  vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
   return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -955,7 +955,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 16, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
-  comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
+  vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
   return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -999,7 +999,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 8, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
-  comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
+  vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
   return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -1043,7 +1043,7 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 5, 8, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
-  comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
+  vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
   return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -1089,6 +1089,23 @@
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 4, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
-  comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
+  vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
   return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
 }
+
+
+void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
+                       int height, const uint8_t *ref, int ref_stride) {
+  int i, j;
+
+  for (i = 0; i < height; i++) {
+    for (j = 0; j < width; j++) {
+      int tmp;
+      tmp = pred[j] + ref[j];
+      comp_pred[j] = (tmp + 1) >> 1;
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+  }
+}
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 3bc2091..62e20dc 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -100,21 +100,9 @@
   vp9_sad_multi_d_fn_t       sdx4df;
 } vp9_variance_fn_ptr_t;
 
-static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
-                          int height, const uint8_t *ref, int ref_stride) {
-  int i, j;
+void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
+                       int height, const uint8_t *ref, int ref_stride);
 
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      int tmp;
-      tmp = pred[j] + ref[j];
-      comp_pred[j] = (tmp + 1) >> 1;
-    }
-    comp_pred += width;
-    pred += width;
-    ref += ref_stride;
-  }
-}
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_write_bit_buffer.c b/vp9/encoder/vp9_write_bit_buffer.c
new file mode 100644
index 0000000..962d0ca
--- /dev/null
+++ b/vp9/encoder/vp9_write_bit_buffer.c
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/encoder/vp9_write_bit_buffer.h"
+
+size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) {
+  return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
+}
+
+void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
+  const int off = (int)wb->bit_offset;
+  const int p = off / CHAR_BIT;
+  const int q = CHAR_BIT - 1 - off % CHAR_BIT;
+  if (q == CHAR_BIT -1) {
+    wb->bit_buffer[p] = bit << q;
+  } else {
+    wb->bit_buffer[p] &= ~(1 << q);
+    wb->bit_buffer[p] |= bit << q;
+  }
+  wb->bit_offset = off + 1;
+}
+
+void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) {
+  int bit;
+  for (bit = bits - 1; bit >= 0; bit--)
+    vp9_wb_write_bit(wb, (data >> bit) & 1);
+}
diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h
index 1795e05..073608d 100644
--- a/vp9/encoder/vp9_write_bit_buffer.h
+++ b/vp9/encoder/vp9_write_bit_buffer.h
@@ -24,29 +24,11 @@
   size_t bit_offset;
 };
 
-static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) {
-  return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
-}
+size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb);
 
-static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
-  const int off = (int)wb->bit_offset;
-  const int p = off / CHAR_BIT;
-  const int q = CHAR_BIT - 1 - off % CHAR_BIT;
-  if (q == CHAR_BIT -1) {
-    wb->bit_buffer[p] = bit << q;
-  } else {
-    wb->bit_buffer[p] &= ~(1 << q);
-    wb->bit_buffer[p] |= bit << q;
-  }
-  wb->bit_offset = off + 1;
-}
+void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit);
 
-static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb,
-                              int data, int bits) {
-  int bit;
-  for (bit = bits - 1; bit >= 0; bit--)
-    vp9_wb_write_bit(wb, (data >> bit) & 1);
-}
+void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits);
 
 
 #ifdef __cplusplus
diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
new file mode 100644
index 0000000..f31b176
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
@@ -0,0 +1,167 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <immintrin.h>  // AVX2
+#include "vpx/vpx_integer.h"
+
+void vp9_sad32x32x4d_avx2(uint8_t *src,
+                          int src_stride,
+                          uint8_t *ref[4],
+                          int ref_stride,
+                          unsigned int res[4]) {
+  __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
+  __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
+  __m256i sum_mlow, sum_mhigh;
+  int i;
+  uint8_t *ref0, *ref1, *ref2, *ref3;
+
+  ref0 = ref[0];
+  ref1 = ref[1];
+  ref2 = ref[2];
+  ref3 = ref[3];
+  sum_ref0 = _mm256_set1_epi16(0);
+  sum_ref1 = _mm256_set1_epi16(0);
+  sum_ref2 = _mm256_set1_epi16(0);
+  sum_ref3 = _mm256_set1_epi16(0);
+  for (i = 0; i < 32 ; i++) {
+    // load src and all refs
+    src_reg = _mm256_load_si256((__m256i *)(src));
+    ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
+    ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
+    ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
+    ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
+    // sum of the absolute differences between every ref-i to src
+    ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
+    ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
+    ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg);
+    ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg);
+    // sum every ref-i
+    sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg);
+    sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg);
+    sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg);
+    sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg);
+
+    src+= src_stride;
+    ref0+= ref_stride;
+    ref1+= ref_stride;
+    ref2+= ref_stride;
+    ref3+= ref_stride;
+  }
+  {
+    __m128i sum;
+    // in sum_ref-i the result is saved in the first 4 bytes
+    // the other 4 bytes are zeroed.
+    // sum_ref1 and sum_ref3 are shifted left by 4 bytes
+    sum_ref1 = _mm256_slli_si256(sum_ref1, 4);
+    sum_ref3 = _mm256_slli_si256(sum_ref3, 4);
+
+    // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
+    sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1);
+    sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3);
+
+    // merge every 64 bit from each sum_ref-i
+    sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2);
+    sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2);
+
+    // add the low 64 bit to the high 64 bit
+    sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh);
+
+    // add the low 128 bit to the high 128 bit
+    sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow),
+                        _mm256_extractf128_si256(sum_mlow, 1));
+
+    _mm_storeu_si128((__m128i *)(res), sum);
+  }
+}
+
+void vp9_sad64x64x4d_avx2(uint8_t *src,
+                          int src_stride,
+                          uint8_t *ref[4],
+                          int ref_stride,
+                          unsigned int res[4]) {
+  __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg;
+  __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg;
+  __m256i ref3_reg, ref3next_reg;
+  __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
+  __m256i sum_mlow, sum_mhigh;
+  int i;
+  uint8_t *ref0, *ref1, *ref2, *ref3;
+
+  ref0 = ref[0];
+  ref1 = ref[1];
+  ref2 = ref[2];
+  ref3 = ref[3];
+  sum_ref0 = _mm256_set1_epi16(0);
+  sum_ref1 = _mm256_set1_epi16(0);
+  sum_ref2 = _mm256_set1_epi16(0);
+  sum_ref3 = _mm256_set1_epi16(0);
+  for (i = 0; i < 64 ; i++) {
+    // load 64 bytes from src and all refs
+    src_reg = _mm256_load_si256((__m256i *)(src));
+    srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
+    ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
+    ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
+    ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
+    ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32));
+    ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
+    ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32));
+    ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
+    ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32));
+    // sum of the absolute differences between every ref-i to src
+    ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
+    ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
+    ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg);
+    ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg);
+    ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg);
+    ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg);
+    ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg);
+    ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg);
+
+    // sum every ref-i
+    sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg);
+    sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg);
+    sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg);
+    sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg);
+    sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg);
+    sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg);
+    sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg);
+    sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg);
+    src+= src_stride;
+    ref0+= ref_stride;
+    ref1+= ref_stride;
+    ref2+= ref_stride;
+    ref3+= ref_stride;
+  }
+  {
+    __m128i sum;
+
+    // in sum_ref-i the result is saved in the first 4 bytes
+    // the other 4 bytes are zeroed.
+    // sum_ref1 and sum_ref3 are shifted left by 4 bytes
+    sum_ref1 = _mm256_slli_si256(sum_ref1, 4);
+    sum_ref3 = _mm256_slli_si256(sum_ref3, 4);
+
+    // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
+    sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1);
+    sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3);
+
+    // merge every 64 bit from each sum_ref-i
+    sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2);
+    sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2);
+
+    // add the low 64 bit to the high 64 bit
+    sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh);
+
+    // add the low 128 bit to the high 128 bit
+    sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow),
+                        _mm256_extractf128_si256(sum_mlow, 1));
+
+    _mm_storeu_si128((__m128i *)(res), sum);
+  }
+}
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 650d9ba..30c2c49 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -803,48 +803,25 @@
         if (cpi->droppable)
           pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE;
 
-        /*if (cpi->output_partition)
-        {
-            int i;
-            const int num_partitions = 1;
-
-            pkt.data.frame.flags |= VPX_FRAME_IS_FRAGMENT;
-
-            for (i = 0; i < num_partitions; ++i)
-            {
-                pkt.data.frame.buf = cx_data;
-                pkt.data.frame.sz = cpi->partition_sz[i];
-                pkt.data.frame.partition_id = i;
-                // don't set the fragment bit for the last partition
-                if (i == (num_partitions - 1))
-                    pkt.data.frame.flags &= ~VPX_FRAME_IS_FRAGMENT;
-                vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
-                cx_data += cpi->partition_sz[i];
-                cx_data_sz -= cpi->partition_sz[i];
-            }
+        if (ctx->pending_cx_data) {
+          ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+          ctx->pending_frame_magnitude |= size;
+          ctx->pending_cx_data_sz += size;
+          size += write_superframe_index(ctx);
+          pkt.data.frame.buf = ctx->pending_cx_data;
+          pkt.data.frame.sz  = ctx->pending_cx_data_sz;
+          ctx->pending_cx_data = NULL;
+          ctx->pending_cx_data_sz = 0;
+          ctx->pending_frame_count = 0;
+          ctx->pending_frame_magnitude = 0;
+        } else {
+          pkt.data.frame.buf = cx_data;
+          pkt.data.frame.sz  = size;
         }
-        else*/
-        {
-          if (ctx->pending_cx_data) {
-            ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-            ctx->pending_frame_magnitude |= size;
-            ctx->pending_cx_data_sz += size;
-            size += write_superframe_index(ctx);
-            pkt.data.frame.buf = ctx->pending_cx_data;
-            pkt.data.frame.sz  = ctx->pending_cx_data_sz;
-            ctx->pending_cx_data = NULL;
-            ctx->pending_cx_data_sz = 0;
-            ctx->pending_frame_count = 0;
-            ctx->pending_frame_magnitude = 0;
-          } else {
-            pkt.data.frame.buf = cx_data;
-            pkt.data.frame.sz  = size;
-          }
-          pkt.data.frame.partition_id = -1;
-          vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
-          cx_data += size;
-          cx_data_sz -= size;
-        }
+        pkt.data.frame.partition_id = -1;
+        vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+        cx_data += size;
+        cx_data_sz -= size;
       }
     }
   }
@@ -1172,8 +1149,7 @@
 CODEC_INTERFACE(vpx_codec_vp9_cx) = {
   "WebM Project VP9 Encoder" VERSION_STRING,
   VPX_CODEC_INTERNAL_ABI_VERSION,
-  VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR |
-  VPX_CODEC_CAP_OUTPUT_PARTITION,
+  VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR,
   /* vpx_codec_caps_t          caps; */
   vp9e_init,          /* vpx_codec_init_fn_t       init; */
   vp9e_destroy,       /* vpx_codec_destroy_fn_t    destroy; */
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 72701d9..2f42a41 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -280,7 +280,6 @@
       oxcf.width = ctx->si.w;
       oxcf.height = ctx->si.h;
       oxcf.version = 9;
-      oxcf.postprocess = 0;
       oxcf.max_threads = ctx->cfg.threads;
       oxcf.inv_tile_order = ctx->invert_tile_order;
       optr = vp9_create_decompressor(&oxcf);
@@ -569,14 +568,14 @@
 
 static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id,
                                      va_list args) {
-  vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+  vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *);
 
   if (data) {
-    vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+    vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data;
     YV12_BUFFER_CONFIG sd;
 
     image2yuvconfig(&frame->img, &sd);
-    return vp9_set_reference_dec(ctx->pbi,
+    return vp9_set_reference_dec(&ctx->pbi->common,
                                  (VP9_REFFRAME)frame->frame_type, &sd);
   } else {
     return VPX_CODEC_INVALID_PARAM;
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index b14e7e5..025e126 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -30,6 +30,7 @@
 VP9_CX_SRCS-yes += encoder/vp9_block.h
 VP9_CX_SRCS-yes += encoder/vp9_writer.h
 VP9_CX_SRCS-yes += encoder/vp9_writer.c
+VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.c
 VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
 VP9_CX_SRCS-yes += encoder/vp9_encodemb.h
@@ -68,10 +69,10 @@
 VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
 VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
 VP9_CX_SRCS-yes += encoder/vp9_variance.c
-VP9_CX_SRCS-yes += encoder/vp9_vaq.c
-VP9_CX_SRCS-yes += encoder/vp9_vaq.h
-VP9_CX_SRCS-yes += encoder/vp9_craq.c
-VP9_CX_SRCS-yes += encoder/vp9_craq.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_variance.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_variance.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h
 ifeq ($(CONFIG_VP9_POSTPROC),yes)
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c
@@ -89,6 +90,7 @@
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index d48a761..d4f4e9f 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -583,8 +583,12 @@
   enc_cfg->rc_dropframe_thresh = 0;
   enc_cfg->rc_end_usage = VPX_CBR;
   enc_cfg->rc_resize_allowed = 0;
-  enc_cfg->rc_min_quantizer = 33;
-  enc_cfg->rc_max_quantizer = 33;
+
+  if (enc_cfg->g_pass == VPX_RC_ONE_PASS) {
+    enc_cfg->rc_min_quantizer = 33;
+    enc_cfg->rc_max_quantizer = 33;
+  }
+
   enc_cfg->rc_undershoot_pct = 100;
   enc_cfg->rc_overshoot_pct = 15;
   enc_cfg->rc_buf_initial_sz = 500;
@@ -784,12 +788,17 @@
   }
   layer_index = layer + VPX_SS_MAX_LAYERS - si->layers;
 
-  if (vpx_svc_is_keyframe(svc_ctx)) {
-    svc_params.min_quantizer = si->quantizer_keyframe[layer_index];
-    svc_params.max_quantizer = si->quantizer_keyframe[layer_index];
+  if (codec_ctx->config.enc->g_pass == VPX_RC_ONE_PASS) {
+    if (vpx_svc_is_keyframe(svc_ctx)) {
+      svc_params.min_quantizer = si->quantizer_keyframe[layer_index];
+      svc_params.max_quantizer = si->quantizer_keyframe[layer_index];
+    } else {
+      svc_params.min_quantizer = si->quantizer[layer_index];
+      svc_params.max_quantizer = si->quantizer[layer_index];
+    }
   } else {
-    svc_params.min_quantizer = si->quantizer[layer_index];
-    svc_params.max_quantizer = si->quantizer[layer_index];
+    svc_params.min_quantizer = codec_ctx->config.enc->rc_min_quantizer;
+    svc_params.max_quantizer = codec_ctx->config.enc->rc_max_quantizer;
   }
 
   svc_params.distance_from_i_frame = si->frame_within_gop;