Merge "Motion referenced partition search for non-RD coding flow"
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 2fbcfe5..514c442 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -274,6 +274,7 @@
 
 clean_temp_files() {
     rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
+    enabled gcov && rm -f ${TMP_C%.c}.gcno ${TMP_CC%.cc}.gcno
 }
 
 #
diff --git a/test/external_frame_buffer_test.cc b/test/external_frame_buffer_test.cc
index 2e7adc1..6e8ab24 100644
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -210,7 +210,7 @@
       ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_));
       ASSERT_EQ(VPX_CODEC_OK,
                 decoder->SetFrameBufferFunctions(
-                    GetVp9FrameBuffer, ReleaseVP9FrameBuffer, this));
+                    GetVP9FrameBuffer, ReleaseVP9FrameBuffer, this));
     }
   }
 
@@ -242,7 +242,7 @@
 
   // Callback to get a free external frame buffer. Return value < 0 is an
   // error.
-  static int GetVp9FrameBuffer(void *user_priv, size_t min_size,
+  static int GetVP9FrameBuffer(void *user_priv, size_t min_size,
                                vpx_codec_frame_buffer_t *fb) {
     ExternalFrameBufferMD5Test *const md5Test =
         reinterpret_cast<ExternalFrameBufferMD5Test*>(user_priv);
diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc
index ff7bb08..86c2b0e 100644
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -25,7 +25,7 @@
 
 namespace {
 
-class Vp8PostProcessingFilterTest
+class VP8PostProcessingFilterTest
     : public ::testing::TestWithParam<post_proc_func_t> {
  public:
   virtual void TearDown() {
@@ -36,7 +36,7 @@
 // Test routine for the VP8 post-processing function
 // vp8_post_proc_down_and_across_mb_row_c.
 
-TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
+TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
   // Size of the underlying data block that will be filtered.
   const int block_width  = 16;
   const int block_height = 16;
@@ -91,7 +91,7 @@
   for (int i = 0; i < block_height; ++i) {
     for (int j = 0; j < block_width; ++j) {
       EXPECT_EQ(expected_data[i], pixel_ptr[j])
-          << "Vp8PostProcessingFilterTest failed with invalid filter output";
+          << "VP8PostProcessingFilterTest failed with invalid filter output";
     }
     pixel_ptr += output_stride;
   }
@@ -101,11 +101,11 @@
   vpx_free(flimits);
 };
 
-INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest,
+INSTANTIATE_TEST_CASE_P(C, VP8PostProcessingFilterTest,
     ::testing::Values(vp8_post_proc_down_and_across_mb_row_c));
 
 #if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest,
+INSTANTIATE_TEST_CASE_P(SSE2, VP8PostProcessingFilterTest,
     ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
 #endif
 
diff --git a/test/set_roi.cc b/test/set_roi.cc
index e28f511..5b054f4 100644
--- a/test/set_roi.cc
+++ b/test/set_roi.cc
@@ -26,7 +26,7 @@
 
 namespace {
 
-TEST(Vp8RoiMapTest, ParameterCheck) {
+TEST(VP8RoiMapTest, ParameterCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
   int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index aba8a3c..c7a0c71 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -13,7 +13,7 @@
 namespace libvpx_test {
 
 #if CONFIG_VP8_DECODER
-const char *kVP8TestVectors[kNumVp8TestVectors] = {
+const char *kVP8TestVectors[kNumVP8TestVectors] = {
   "vp80-00-comprehensive-001.ivf",
   "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf",
   "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf",
@@ -49,7 +49,7 @@
 };
 #endif  // CONFIG_VP8_DECODER
 #if CONFIG_VP9_DECODER
-const char *kVP9TestVectors[kNumVp9TestVectors] = {
+const char *kVP9TestVectors[kNumVP9TestVectors] = {
   "vp90-2-00-quantizer-00.webm", "vp90-2-00-quantizer-01.webm",
   "vp90-2-00-quantizer-02.webm", "vp90-2-00-quantizer-03.webm",
   "vp90-2-00-quantizer-04.webm", "vp90-2-00-quantizer-05.webm",
diff --git a/test/test_vectors.h b/test/test_vectors.h
index d5ecc96..de961de 100644
--- a/test/test_vectors.h
+++ b/test/test_vectors.h
@@ -16,14 +16,14 @@
 namespace libvpx_test {
 
 #if CONFIG_VP8_DECODER
-const int kNumVp8TestVectors = 62;
-extern const char *kVP8TestVectors[kNumVp8TestVectors];
+const int kNumVP8TestVectors = 62;
+extern const char *kVP8TestVectors[kNumVP8TestVectors];
 #endif
 
 #if CONFIG_VP9_DECODER
-const int kNumVp9TestVectors = 223;
+const int kNumVP9TestVectors = 223;
 
-extern const char *kVP9TestVectors[kNumVp9TestVectors];
+extern const char *kVP9TestVectors[kNumVP9TestVectors];
 #endif  // CONFIG_VP9_DECODER
 
 }  // namespace libvpx_test
diff --git a/test/vp8_fdct4x4_test.cc b/test/vp8_fdct4x4_test.cc
index e3c292e..bdbf74e 100644
--- a/test/vp8_fdct4x4_test.cc
+++ b/test/vp8_fdct4x4_test.cc
@@ -68,7 +68,7 @@
 
 using libvpx_test::ACMRandom;
 
-TEST(Vp8FdctTest, SignBiasCheck) {
+TEST(VP8FdctTest, SignBiasCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   int16_t test_input_block[16];
   int16_t test_output_block[16];
@@ -127,7 +127,7 @@
     << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
 };
 
-TEST(Vp8FdctTest, RoundTripErrorCheck) {
+TEST(VP8FdctTest, RoundTripErrorCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   int max_error = 0;
   double total_error = 0;
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 600dfbb..0f26f6e 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -108,6 +108,9 @@
   vpx_free(cm->last_frame_seg_map);
   cm->last_frame_seg_map = NULL;
 
+  vpx_free(cm->above_context);
+  cm->above_context = NULL;
+
   vpx_free(cm->above_seg_context);
   cm->above_seg_context = NULL;
 }
@@ -136,6 +139,14 @@
   if (!cm->last_frame_seg_map)
     goto fail;
 
+  vpx_free(cm->above_context);
+  cm->above_context =
+      (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
+                                        MAX_MB_PLANE,
+                                    sizeof(*cm->above_context));
+  if (!cm->above_context)
+    goto fail;
+
   vpx_free(cm->above_seg_context);
   cm->above_seg_context =
      (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
@@ -151,12 +162,11 @@
 }
 
 int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
-  int i;
-
   const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
   const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
   const int ss_x = cm->subsampling_x;
   const int ss_y = cm->subsampling_y;
+  int i;
 
   vp9_free_frame_buffers(cm);
 
@@ -191,6 +201,12 @@
   if (!cm->last_frame_seg_map)
     goto fail;
 
+  cm->above_context =
+      (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
+                                        MAX_MB_PLANE,
+                                    sizeof(*cm->above_context));
+  if (!cm->above_context)
+    goto fail;
 
   cm->above_seg_context =
       (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index ca5a0c2..b3f2ec5 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -208,7 +208,6 @@
 
   // A NULL indicates that the 8x8 is not part of the image
   MODE_INFO **mi_8x8;
-  MODE_INFO **prev_mi_8x8;
 
   int up_available;
   int left_available;
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 197b7c0..5bb0482 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -8,14 +8,13 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_entropymv.h"
 
 #define MV_COUNT_SAT 20
 #define MV_MAX_UPDATE_FACTOR 128
 
-/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
+// Integer pel reference mv threshold for use of high-precision 1/8 mv
 #define COMPANDED_MVREF_THRESH 8
 
 const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
@@ -49,32 +48,30 @@
 
 static const nmv_context default_nmv_context = {
   {32, 64, 96},
-  { // NOLINT
-    { /* vert component */ // NOLINT
-      128,                                                  /* sign */
-      {224, 144, 192, 168, 192, 176, 192, 198, 198, 245},   /* class */
-      {216},                                                /* class0 */
-      {136, 140, 148, 160, 176, 192, 224, 234, 234, 240},   /* bits */
-      {{128, 128, 64}, {96, 112, 64}},                      /* class0_fp */
-      {64, 96, 64},                                         /* fp */
-      160,                                                  /* class0_hp bit */
-      128,                                                  /* hp */
+  {
+    { // Vertical component
+      128,                                                  // sign
+      {224, 144, 192, 168, 192, 176, 192, 198, 198, 245},   // class
+      {216},                                                // class0
+      {136, 140, 148, 160, 176, 192, 224, 234, 234, 240},   // bits
+      {{128, 128, 64}, {96, 112, 64}},                      // class0_fp
+      {64, 96, 64},                                         // fp
+      160,                                                  // class0_hp bit
+      128,                                                  // hp
     },
-    { /* hor component */ // NOLINT
-      128,                                                  /* sign */
-      {216, 128, 176, 160, 176, 176, 192, 198, 198, 208},   /* class */
-      {208},                                                /* class0 */
-      {136, 140, 148, 160, 176, 192, 224, 234, 234, 240},   /* bits */
-      {{128, 128, 64}, {96, 112, 64}},                      /* class0_fp */
-      {64, 96, 64},                                         /* fp */
-      160,                                                  /* class0_hp bit */
-      128,                                                  /* hp */
+    { // Horizontal component
+      128,                                                  // sign
+      {216, 128, 176, 160, 176, 176, 192, 198, 198, 208},   // class
+      {208},                                                // class0
+      {136, 140, 148, 160, 176, 192, 224, 234, 234, 240},   // bits
+      {{128, 128, 64}, {96, 112, 64}},                      // class0_fp
+      {64, 96, 64},                                         // fp
+      160,                                                  // class0_hp bit
+      128,                                                  // hp
     }
   },
 };
 
-#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
-
 static const uint8_t log_in_base_2[] = {
   0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
   4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -121,9 +118,13 @@
   9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
 };
 
+static INLINE int mv_class_base(MV_CLASS_TYPE c) {
+  return c ? CLASS0_SIZE << (c + 2) : 0;
+}
+
 MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
-  const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? MV_CLASS_10 :
-                              (MV_CLASS_TYPE)log_in_base_2[z >> 3];
+  const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ?
+      MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3];
   if (offset)
     *offset = z - mv_class_base(c);
   return c;
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 9f2c2df..c043e6c 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -193,11 +193,14 @@
                              int block, int mi_row, int mi_col) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
-  const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi ?
-                                 xd->prev_mi_8x8[0] : NULL;
+  const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
+        ? cm->prev_mi_grid_visible[mi_row * xd->mode_info_stride + mi_col]
+        : NULL;
+  const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
+
+
   const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
-  const MB_MODE_INFO *const prev_mbmi = cm->coding_use_prev_mi && prev_mi ?
-      &prev_mi->mbmi : NULL;
+
   int different_ref_found = 0;
   int context_counter = 0;
 
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 3cc12cf..f564afb 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -204,6 +204,7 @@
   InternalFrameBufferList int_frame_buffers;
 
   PARTITION_CONTEXT *above_seg_context;
+  ENTROPY_CONTEXT *above_context;
 } VP9_COMMON;
 
 static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 44d1e28..a2bd654 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -187,27 +187,6 @@
     xd->plane[i].dequant = cm->uv_dequant[q_index];
 }
 
-// Allocate storage for each tile column.
-// TODO(jzern): when max_threads <= 1 the same storage could be used for each
-// tile.
-static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) {
-  VP9_COMMON *const cm = &pbi->common;
-  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
-  int i;
-
-  // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
-  // block where mi unit size is 8x8.
-  CHECK_MEM_ERROR(cm, pbi->above_context[0],
-                  vpx_realloc(pbi->above_context[0],
-                              sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
-                              2 * aligned_mi_cols));
-  for (i = 1; i < MAX_MB_PLANE; ++i) {
-    pbi->above_context[i] = pbi->above_context[0] +
-                            i * sizeof(*pbi->above_context[0]) *
-                            2 * aligned_mi_cols;
-  }
-}
-
 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
                                     TX_SIZE tx_size, uint8_t *dst, int stride,
                                     int eob) {
@@ -323,7 +302,6 @@
   int x, y;
 
   xd->mi_8x8 = cm->mi_grid_visible + offset;
-  xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset;
   xd->mi_8x8[0] = &cm->mi[offset];
   xd->mi_8x8[0]->mbmi.sb_type = bsize;
   for (y = 0; y < y_mis; ++y)
@@ -640,9 +618,7 @@
     read_frame_size(rb, &cm->display_width, &cm->display_height);
 }
 
-static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
-  VP9_COMMON *cm = &pbi->common;
-
+static void apply_frame_size(VP9_COMMON *cm, int width, int height) {
   if (cm->width != width || cm->height != height) {
     // Change in frame size.
     // TODO(agrange) Don't test width/height, check overall size.
@@ -669,18 +645,15 @@
   }
 }
 
-static void setup_frame_size(VP9D_COMP *pbi,
-                             struct vp9_read_bit_buffer *rb) {
+static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
   int width, height;
   read_frame_size(rb, &width, &height);
-  apply_frame_size(pbi, width, height);
-  setup_display_size(&pbi->common, rb);
+  apply_frame_size(cm, width, height);
+  setup_display_size(cm, rb);
 }
 
-static void setup_frame_size_with_refs(VP9D_COMP *pbi,
+static void setup_frame_size_with_refs(VP9_COMMON *cm,
                                        struct vp9_read_bit_buffer *rb) {
-  VP9_COMMON *const cm = &pbi->common;
-
   int width, height;
   int found = 0, i;
   for (i = 0; i < REFS_PER_FRAME; ++i) {
@@ -700,19 +673,19 @@
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Referenced frame with invalid size");
 
-  apply_frame_size(pbi, width, height);
+  apply_frame_size(cm, width, height);
   setup_display_size(cm, rb);
 }
 
-static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd,
+static void setup_tile_context(VP9_COMMON *cm, MACROBLOCKD *const xd,
                                int tile_row, int tile_col) {
   int i;
 
   for (i = 0; i < MAX_MB_PLANE; ++i)
-    xd->above_context[i] = pbi->above_context[i];
+    xd->above_context[i] = cm->above_context +
+        i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
 
-  // see note in alloc_tile_storage().
-  xd->above_seg_context = pbi->common.above_seg_context;
+  xd->above_seg_context = cm->above_seg_context;
 }
 
 static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile,
@@ -838,8 +811,8 @@
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(pbi->above_context[0], 0,
-             sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols);
+  vpx_memset(cm->above_context, 0,
+             sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols);
 
   vpx_memset(cm->above_seg_context, 0,
              sizeof(*cm->above_seg_context) * aligned_cols);
@@ -869,7 +842,7 @@
 
       vp9_tile_init(&tile, cm, tile_row, col);
       setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r);
-      setup_tile_context(pbi, xd, tile_row, col);
+      setup_tile_context(cm, xd, tile_row, col);
       decode_tile(pbi, &tile, &r);
 
       if (last_tile)
@@ -966,9 +939,8 @@
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(pbi->above_context[0], 0,
-             sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
-             2 * aligned_mi_cols);
+  vpx_memset(cm->above_context, 0,
+             sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
   vpx_memset(cm->above_seg_context, 0,
              sizeof(*cm->above_seg_context) * aligned_mi_cols);
 
@@ -1018,7 +990,7 @@
 
       setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
                           &tile_data->bit_reader);
-      setup_tile_context(pbi, &tile_data->xd, 0, buf->col);
+      setup_tile_context(cm, &tile_data->xd, 0, buf->col);
       setup_tile_macroblockd(tile_data);
 
       worker->had_error = 0;
@@ -1135,7 +1107,7 @@
       cm->frame_refs[i].buf = get_frame_new_buffer(cm);
     }
 
-    setup_frame_size(pbi, rb);
+    setup_frame_size(cm, rb);
   } else {
     cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb);
 
@@ -1146,7 +1118,7 @@
       check_sync_code(cm, rb);
 
       pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
-      setup_frame_size(pbi, rb);
+      setup_frame_size(cm, rb);
     } else {
       pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
 
@@ -1158,7 +1130,7 @@
         cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
       }
 
-      setup_frame_size_with_refs(pbi, rb);
+      setup_frame_size_with_refs(cm, rb);
 
       cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
       cm->interp_filter = read_interp_filter(rb);
@@ -1347,8 +1319,6 @@
     }
   }
 
-  alloc_tile_storage(pbi, tile_rows, tile_cols);
-
   xd->mode_info_stride = cm->mode_info_stride;
   if (cm->coding_use_prev_mi)
     set_prev_mi(cm);
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 982b851..c7c096e 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -164,12 +164,10 @@
 }
 
 void vp9_remove_decompressor(VP9D_COMP *pbi) {
+  VP9_COMMON *const cm = &pbi->common;
   int i;
 
-  if (!pbi)
-    return;
-
-  vp9_remove_common(&pbi->common);
+  vp9_remove_common(cm);
   vp9_worker_end(&pbi->lf_worker);
   vpx_free(pbi->lf_worker.data1);
   for (i = 0; i < pbi->num_tile_workers; ++i) {
@@ -181,16 +179,11 @@
   vpx_free(pbi->tile_workers);
 
   if (pbi->num_tile_workers) {
-    VP9_COMMON *const cm = &pbi->common;
     const int sb_rows =
         mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
-    VP9LfSync *const lf_sync = &pbi->lf_row_sync;
-
-    vp9_loop_filter_dealloc(lf_sync, sb_rows);
+    vp9_loop_filter_dealloc(&pbi->lf_row_sync, sb_rows);
   }
 
-  vpx_free(pbi->above_context[0]);
-  vpx_free(pbi->common.above_seg_context);
   vpx_free(pbi);
 }
 
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 9318c55..2b16970 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -61,8 +61,6 @@
   int num_tile_workers;
 
   VP9LfSync lf_row_sync;
-
-  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
 } VP9D_COMP;
 
 void vp9_initialize_dec();
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 231276b..7442d01 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -19,6 +19,51 @@
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_segmentation.h"
 
+struct CYCLIC_REFRESH {
+  // Target percentage of blocks per frame that are cyclicly refreshed.
+  int max_mbs_perframe;
+  // Maximum q-delta as percentage of base q.
+  int max_qdelta_perc;
+  // Block size below which we don't apply cyclic refresh.
+  BLOCK_SIZE min_block_size;
+  // Macroblock starting index (unit of 8x8) for cycling through the frame.
+  int mb_index;
+  // Controls how long a block will need to wait to be refreshed again.
+  int time_for_refresh;
+  // Actual number of blocks that were applied delta-q (segment 1).
+  int num_seg_blocks;
+  // Actual encoding bits for segment 1.
+  int actual_seg_bits;
+  // RD mult. parameters for segment 1.
+  int rdmult;
+  // Cyclic refresh map.
+  signed char *map;
+  // Projected rate and distortion for the current superblock.
+  int64_t projected_rate_sb;
+  int64_t projected_dist_sb;
+  // Thresholds applied to projected rate/distortion of the superblock.
+  int64_t thresh_rate_sb;
+  int64_t thresh_dist_sb;
+};
+
+CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
+  CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
+  if (cr == NULL)
+    return NULL;
+
+  cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
+  if (cr->map == NULL) {
+    vpx_free(cr);
+    return NULL;
+  }
+
+  return cr;
+}
+
+void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
+  vpx_free(cr->map);
+  vpx_free(cr);
+}
 
 // Check if we should turn off cyclic refresh based on bitrate condition.
 static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm,
@@ -73,14 +118,12 @@
 // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
 // check if we should reset the segment_id, and update the cyclic_refresh map
 // and segmentation map.
-void vp9_update_segment_aq(VP9_COMP *const cpi,
-                           MB_MODE_INFO *const mbmi,
-                           int mi_row,
-                           int mi_col,
-                           BLOCK_SIZE bsize,
-                           int use_rd) {
+void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
+                                       MB_MODE_INFO *const mbmi,
+                                       int mi_row, int mi_col,
+                                       BLOCK_SIZE bsize, int use_rd) {
   const VP9_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh;
+  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   const int bw = num_8x8_blocks_wide_lookup[bsize];
   const int bh = num_8x8_blocks_high_lookup[bsize];
   const int xmis = MIN(cm->mi_cols - mi_col, bw);
@@ -126,10 +169,10 @@
 }
 
 // Setup cyclic background refresh: set delta q and segmentation map.
-void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) {
+void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const RATE_CONTROL *const rc = &cpi->rc;
-  CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh;
+  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   struct segmentation *const seg = &cm->seg;
   unsigned char *const seg_map = cpi->segmentation_map;
   const int apply_cyclic_refresh  = apply_cyclic_refresh_bitrate(cm, rc);
@@ -253,3 +296,13 @@
       }
   }
 }
+
+void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr,
+                                             int64_t rate_sb, int64_t dist_sb) {
+  cr->projected_rate_sb = rate_sb;
+  cr->projected_dist_sb = dist_sb;
+}
+
+int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
+  return cr->rdmult;
+}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index 14dc2cd..f556d65 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -18,47 +18,30 @@
 extern "C" {
 #endif
 
-typedef struct {
-  // Target percentage of blocks per frame that are cyclicly refreshed.
-  int max_mbs_perframe;
-  // Maximum q-delta as percentage of base q.
-  int max_qdelta_perc;
-  // Block size below which we don't apply cyclic refresh.
-  BLOCK_SIZE min_block_size;
-  // Macroblock starting index (unit of 8x8) for cycling through the frame.
-  int mb_index;
-  // Controls how long a block will need to wait to be refreshed again.
-  int time_for_refresh;
-  // Actual number of blocks that were applied delta-q (segment 1).
-  int num_seg_blocks;
-  // Actual encoding bits for segment 1.
-  int actual_seg_bits;
-  // RD mult. parameters for segment 1.
-  int rdmult;
-  // Cyclic refresh map.
-  signed char *map;
-  // Projected rate and distortion for the current superblock.
-  int64_t projected_rate_sb;
-  int64_t projected_dist_sb;
-  // Thresholds applied to projected rate/distortion of the superblock.
-  int64_t thresh_rate_sb;
-  int64_t thresh_dist_sb;
-} CYCLIC_REFRESH;
-
 struct VP9_COMP;
 
+struct CYCLIC_REFRESH;
+typedef struct CYCLIC_REFRESH CYCLIC_REFRESH;
+
+CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols);
+
+void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr);
+
 // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
 // check if we should reset the segment_id, and update the cyclic_refresh map
 // and segmentation map.
-void vp9_update_segment_aq(struct VP9_COMP *const cpi,
-                           MB_MODE_INFO *const mbmi,
-                           int mi_row,
-                           int mi_col,
-                           BLOCK_SIZE bsize,
-                           int use_rd);
+void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
+                                       MB_MODE_INFO *const mbmi,
+                                       int mi_row, int mi_col,
+                                       BLOCK_SIZE bsize, int use_rd);
 
 // Setup cyclic background refresh: set delta q and segmentation map.
-void vp9_setup_cyclic_refresh_aq(struct VP9_COMP *const cpi);
+void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi);
+
+void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr,
+                                             int64_t rate_sb, int64_t dist_sb);
+
+int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 2336e05..c563f54 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -164,7 +164,6 @@
                                         int mi_col) {
   const int idx_str = xd->mode_info_stride * mi_row + mi_col;
   xd->mi_8x8 = cm->mi_grid_visible + idx_str;
-  xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
   xd->mi_8x8[0] = cm->mi + idx_str;
 }
 
@@ -207,7 +206,7 @@
   const int idx_map = mb_row * cm->mb_cols + mb_col;
   const struct segmentation *const seg = &cm->seg;
 
-  set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col);
+  set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col);
 
   // Activity map pointer
   x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
@@ -896,22 +895,17 @@
 
   assert(mi->mbmi.sb_type == bsize);
 
-  // For in frame adaptive Q copy over the chosen segment id into the
-  // mode innfo context for the chosen mode / partition.
-  if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ ||
-      cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) &&
-      output_enabled) {
-    // Check for reseting segment_id and update cyclic map.
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) {
-      vp9_update_segment_aq(cpi, &xd->mi_8x8[0]->mbmi,
-                            mi_row, mi_col, bsize, 1);
-      vp9_init_plane_quantizers(cpi, x);
-    }
-    mi->mbmi.segment_id = xd->mi_8x8[0]->mbmi.segment_id;
-  }
-
   *mi_addr = *mi;
 
+  // For in frame adaptive Q, check for reseting the segment_id and updating
+  // the cyclic refresh map.
+  if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled &&
+      output_enabled) {
+    vp9_cyclic_refresh_update_segment(cpi, &xd->mi_8x8[0]->mbmi,
+                                      mi_row, mi_col, bsize, 1);
+    vp9_init_plane_quantizers(cpi, x);
+  }
+
   max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
   for (i = 0; i < max_plane; ++i) {
     p[i].coeff = ctx->coeff_pbuf[i][1];
@@ -1107,7 +1101,7 @@
         : cm->last_frame_seg_map;
     // If segment 1, use rdmult for that segment.
     if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col))
-      x->rdmult = cpi->cyclic_refresh.rdmult;
+      x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
   }
 
   // Find best coding mode & reconstruct the MB so it is available
@@ -1207,12 +1201,12 @@
   int mi_height = num_8x8_blocks_high_lookup[bsize];
   for (p = 0; p < MAX_MB_PLANE; p++) {
     vpx_memcpy(
-        cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
+        xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
         a + num_4x4_blocks_wide * p,
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
         xd->plane[p].subsampling_x);
     vpx_memcpy(
-        cpi->left_context[p]
+        xd->left_context[p]
             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
         l + num_4x4_blocks_high * p,
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
@@ -1240,12 +1234,12 @@
   for (p = 0; p < MAX_MB_PLANE; ++p) {
     vpx_memcpy(
         a + num_4x4_blocks_wide * p,
-        cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
+        xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
         xd->plane[p].subsampling_x);
     vpx_memcpy(
         l + num_4x4_blocks_high * p,
-        cpi->left_context[p]
+        xd->left_context[p]
             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
         xd->plane[p].subsampling_y);
@@ -1473,7 +1467,8 @@
 
   // Check for reseting segment_id and update cyclic map.
   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) {
-    vp9_update_segment_aq(cpi, &xd->mi_8x8[0]->mbmi, mi_row, mi_col, bsize, 1);
+    vp9_cyclic_refresh_update_segment(cpi, &xd->mi_8x8[0]->mbmi,
+                                      mi_row, mi_col, bsize, 1);
     vp9_init_plane_quantizers(cpi, x);
   }
 
@@ -1884,10 +1879,10 @@
       select_in_frame_q_segment(cpi, mi_row, mi_col,
                                 output_enabled, chosen_rate);
     }
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
-      cpi->cyclic_refresh.projected_rate_sb = chosen_rate;
-      cpi->cyclic_refresh.projected_dist_sb = chosen_dist;
-    }
+
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+                                              chosen_rate, chosen_dist);
 
     encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
   }
@@ -1952,21 +1947,20 @@
 // Look at neighboring blocks and set a min and max partition size based on
 // what they chose.
 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
-                                    int row, int col,
+                                    int mi_row, int mi_col,
                                     BLOCK_SIZE *min_block_size,
                                     BLOCK_SIZE *max_block_size) {
-  VP9_COMMON * const cm = &cpi->common;
+  VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  MODE_INFO ** mi_8x8 = xd->mi_8x8;
-  MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8;
+  MODE_INFO **mi_8x8 = xd->mi_8x8;
   const int left_in_image = xd->left_available && mi_8x8[-1];
   const int above_in_image = xd->up_available &&
                              mi_8x8[-xd->mode_info_stride];
-  MODE_INFO ** above_sb64_mi_8x8;
-  MODE_INFO ** left_sb64_mi_8x8;
+  MODE_INFO **above_sb64_mi_8x8;
+  MODE_INFO **left_sb64_mi_8x8;
 
-  int row8x8_remaining = tile->mi_row_end - row;
-  int col8x8_remaining = tile->mi_col_end - col;
+  int row8x8_remaining = tile->mi_row_end - mi_row;
+  int col8x8_remaining = tile->mi_col_end - mi_col;
   int bh, bw;
   BLOCK_SIZE min_size = BLOCK_4X4;
   BLOCK_SIZE max_size = BLOCK_64X64;
@@ -1980,8 +1974,9 @@
     // passed in values for min and max as a starting point.
     // Find the min and max partition used in previous frame at this location
     if (cm->frame_type != KEY_FRAME) {
-      get_sb_partition_size_range(cpi, prev_mi_8x8,
-                                  &min_size, &max_size);
+      MODE_INFO **const prev_mi =
+          &cm->prev_mi_grid_visible[mi_row * xd->mode_info_stride + mi_col];
+      get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size);
     }
     // Find the min and max partition sizes used in the left SB64
     if (left_in_image) {
@@ -2325,10 +2320,10 @@
     if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
       select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate);
     }
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
-      cpi->cyclic_refresh.projected_rate_sb = best_rate;
-      cpi->cyclic_refresh.projected_dist_sb = best_dist;
-    }
+
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+                                              best_rate, best_dist);
 
     encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
   }
@@ -2348,7 +2343,7 @@
   int mi_col;
 
   // Initialize the left context for the new SB row
-  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
+  vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
   vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
 
   // Code each SB in the row
@@ -2475,8 +2470,8 @@
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(cpi->above_context[0], 0,
-             sizeof(*cpi->above_context[0]) *
+  vpx_memset(xd->above_context[0], 0,
+             sizeof(*xd->above_context[0]) *
              2 * aligned_mi_cols * MAX_MB_PLANE);
   vpx_memset(xd->above_seg_context, 0,
              sizeof(*xd->above_seg_context) * aligned_mi_cols);
@@ -2932,10 +2927,10 @@
     if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
       select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate);
     }
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
-      cpi->cyclic_refresh.projected_rate_sb = best_rate;
-      cpi->cyclic_refresh.projected_dist_sb = best_dist;
-    }
+
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+                                              best_rate, best_dist);
 
     encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
   }
@@ -3052,10 +3047,9 @@
   }
 
   if (bsize == BLOCK_64X64 && output_enabled) {
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
-      cpi->cyclic_refresh.projected_rate_sb = *totrate;
-      cpi->cyclic_refresh.projected_dist_sb = *totdist;
-    }
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+                                              *totrate, *totdist);
     encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize);
   }
 }
@@ -3067,7 +3061,7 @@
   int mi_col;
 
   // Initialize the left context for the new SB row
-  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
+  vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
   vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
 
   // Code each SB in the row
@@ -3465,11 +3459,9 @@
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
 
   x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
-                   (cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
-                    cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) &&
-                   !cpi->sf.use_nonrd_pick_mode &&
-                   !cpi->sf.use_uv_intra_rd_estimate &&
-                   !cpi->sf.skip_encode_sb;
+                   cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
+                   cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
+                   cpi->sf.allow_skip_recode;
 
   x->skip_optimize = ctx->is_coded;
   ctx->is_coded = 1;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index acaeadc..f7a6eee 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -144,6 +144,22 @@
   }
 }
 
+static void setup_key_frame(VP9_COMP *cpi) {
+  vp9_setup_past_independence(&cpi->common);
+
+  // All buffers are implicitly updated on key frames.
+  cpi->refresh_golden_frame = 1;
+  cpi->refresh_alt_ref_frame = 1;
+}
+
+static void setup_inter_frame(VP9_COMMON *cm) {
+  if (cm->error_resilient_mode || cm->intra_only)
+    vp9_setup_past_independence(cm);
+
+  assert(cm->frame_context_idx < FRAME_CONTEXTS);
+  cm->fc = cm->frame_contexts[cm->frame_context_idx];
+}
+
 void vp9_initialize_enc() {
   static int init_done = 0;
 
@@ -173,11 +189,13 @@
   cpi->coding_context.last_frame_seg_map_copy = NULL;
 
   vpx_free(cpi->complexity_map);
-  cpi->complexity_map = 0;
-  vpx_free(cpi->cyclic_refresh.map);
-  cpi->cyclic_refresh.map = 0;
+  cpi->complexity_map = NULL;
+
+  vp9_cyclic_refresh_free(cpi->cyclic_refresh);
+  cpi->cyclic_refresh = NULL;
+
   vpx_free(cpi->active_map);
-  cpi->active_map = 0;
+  cpi->active_map = NULL;
 
   vp9_free_frame_buffers(cm);
 
@@ -194,9 +212,6 @@
   cpi->mb_activity_map = 0;
   vpx_free(cpi->mb_norm_activity_map);
   cpi->mb_norm_activity_map = 0;
-
-  vpx_free(cpi->above_context[0]);
-  cpi->above_context[0] = NULL;
 }
 
 // Computes a q delta (in "q index" terms) to get from a starting q value
@@ -574,6 +589,7 @@
   int i;
   sf->adaptive_rd_thresh = 1;
   sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
+  sf->allow_skip_recode = 1;
   if (speed == 1) {
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check  = 1;
@@ -585,7 +601,6 @@
         DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
     else
       sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
-
     sf->use_rd_breakout = 1;
     sf->adaptive_motion_search = 1;
     sf->adaptive_pred_interp_filter = 1;
@@ -617,7 +632,6 @@
     sf->adaptive_pred_interp_filter = 2;
     sf->reference_masking = 1;
     sf->auto_mv_step_size = 1;
-
     sf->disable_filter_search_var_thresh = 50;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
 
@@ -663,9 +677,9 @@
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
     sf->adjust_partitioning_from_last_frame = 1;
     sf->last_partitioning_redo_frequency = 3;
-
     sf->use_uv_intra_rd_estimate = 1;
     sf->skip_encode_sb = 1;
+    sf->allow_skip_recode = 0;
     sf->use_lp32x32fdct = 1;
     sf->subpel_iters_per_step = 1;
     sf->use_fast_coef_updates = 2;
@@ -703,6 +717,7 @@
 
     sf->use_uv_intra_rd_estimate = 1;
     sf->skip_encode_sb = 1;
+    sf->allow_skip_recode = 0;
     sf->use_lp32x32fdct = 1;
     sf->subpel_iters_per_step = 1;
     sf->use_fast_coef_updates = 2;
@@ -741,6 +756,7 @@
     sf->use_fast_coef_costing = 1;
     sf->adaptive_rd_thresh = 4;
     sf->mode_skip_start = 6;
+    sf->allow_skip_recode = 1;
   }
 }
 
@@ -749,7 +765,6 @@
                                  int speed) {
   sf->static_segmentation = 0;
   sf->adaptive_rd_thresh = 1;
-  sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
   sf->encode_breakout_thresh = 1;
   sf->use_fast_coef_costing = 1;
 
@@ -770,7 +785,6 @@
     sf->adaptive_pred_interp_filter = 1;
     sf->auto_mv_step_size = 1;
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -807,7 +821,6 @@
     sf->last_partitioning_redo_frequency = 3;
 
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->use_lp32x32fdct = 1;
     sf->mode_skip_start = 11;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
@@ -836,6 +849,7 @@
     sf->adaptive_rd_thresh = 4;
     sf->mode_skip_start = 6;
     sf->encode_breakout_thresh = 400;
+    sf->allow_skip_recode = 0;
   }
   if (speed >= 4) {
     sf->optimize_coefficients = 0;
@@ -866,6 +880,7 @@
     sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV));
     sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV));
     sf->max_intra_bsize = BLOCK_32X32;
+    sf->allow_skip_recode = 1;
   }
   if (speed >= 6) {
     sf->max_partition_size = BLOCK_32X32;
@@ -875,6 +890,7 @@
     sf->partition_search_type = REFERENCE_PARTITION;
     sf->use_nonrd_pick_mode = 1;
     sf->search_method = FAST_DIAMOND;
+    sf->allow_skip_recode = 0;
   }
   if (speed >= 7) {
     sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
@@ -941,6 +957,7 @@
   sf->use_rd_breakout = 0;
   sf->skip_encode_sb = 0;
   sf->use_uv_intra_rd_estimate = 0;
+  sf->allow_skip_recode = 0;
   sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
   sf->use_fast_coef_updates = 0;
   sf->use_fast_coef_costing = 0;
@@ -1062,19 +1079,12 @@
   CHECK_MEM_ERROR(cm, cpi->mb_norm_activity_map,
                   vpx_calloc(sizeof(unsigned int),
                              cm->mb_rows * cm->mb_cols));
-
-  // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
-  // block where mi unit size is 8x8.
-  vpx_free(cpi->above_context[0]);
-  CHECK_MEM_ERROR(cm, cpi->above_context[0],
-                  vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
-                             MAX_MB_PLANE,
-                             sizeof(*cpi->above_context[0])));
 }
 
 
 static void update_frame_size(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 
   vp9_update_frame_size(cm);
 
@@ -1105,13 +1115,13 @@
 
   {
     int i;
-    for (i = 1; i < MAX_MB_PLANE; ++i) {
-      cpi->above_context[i] = cpi->above_context[0] +
-                              i * sizeof(*cpi->above_context[0]) * 2 *
-                              mi_cols_aligned_to_sb(cm->mi_cols);
-      cpi->mb.e_mbd.above_seg_context = cpi->common.above_seg_context;
-    }
+
+    for (i = 0; i < MAX_MB_PLANE; ++i)
+      xd->above_context[i] = cm->above_context +
+        i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
   }
+
+  xd->above_seg_context = cpi->common.above_seg_context;
 }
 
 // Table that converts 0-63 Q range values passed in outside to the Qindex
@@ -1653,8 +1663,8 @@
                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
 
   // Create a map used for cyclic background refresh.
-  CHECK_MEM_ERROR(cm, cpi->cyclic_refresh.map,
-                  vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
+  CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
+                  vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
 
   // And a place holder structure is the coding context
   // for use if we want to save and restore it
@@ -2657,12 +2667,12 @@
   // other inter-frames the encoder currently uses only two contexts;
   // context 1 for ALTREF frames and context 0 for the others.
   if (cm->frame_type == KEY_FRAME) {
-    vp9_setup_key_frame(cpi);
+    setup_key_frame(cpi);
   } else {
-    if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) {
+    if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
       cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
-    }
-    vp9_setup_inter_frame(cpi);
+
+    setup_inter_frame(cm);
   }
   // Variance adaptive and in frame q adjustment experiments are mutually
   // exclusive.
@@ -2671,7 +2681,7 @@
   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
     setup_in_frame_q_adj(cpi);
   } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
-    vp9_setup_cyclic_refresh_aq(cpi);
+    vp9_cyclic_refresh_setup(cpi);
   }
   // transform / motion compensation build reconstruction frame
   vp9_encode_frame(cpi);
@@ -2715,12 +2725,12 @@
       // other inter-frames the encoder currently uses only two contexts;
       // context 1 for ALTREF frames and context 0 for the others.
       if (cm->frame_type == KEY_FRAME) {
-        vp9_setup_key_frame(cpi);
+        setup_key_frame(cpi);
       } else {
-        if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) {
+        if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
           cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
-        }
-        vp9_setup_inter_frame(cpi);
+
+        setup_inter_frame(cm);
       }
     }
 
@@ -3005,7 +3015,7 @@
 
   // Set various flags etc to special state if it is a key frame.
   if (frame_is_intra_only(cm)) {
-    vp9_setup_key_frame(cpi);
+    setup_key_frame(cpi);
     // Reset the loop filter deltas and segmentation map.
     vp9_reset_segment_features(&cm->seg);
 
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 022a63f..b62fa71 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -284,6 +284,9 @@
   // blocks and the q is less than a threshold.
   int skip_encode_sb;
   int skip_encode_frame;
+  // Speed feature to allow or disallow skipping of recode at block
+  // level within a frame.
+  int allow_skip_recode;
 
   // This variable allows us to reuse the last frames partition choices
   // (64x64 v 32x32 etc) for this frame. It can be set to only use the last
@@ -722,7 +725,7 @@
   unsigned char *active_map;
   unsigned int active_map_enabled;
 
-  CYCLIC_REFRESH cyclic_refresh;
+  CYCLIC_REFRESH *cyclic_refresh;
 
   fractional_mv_step_fp *find_fractional_mv_step;
   fractional_mv_step_comp_fp *find_fractional_mv_step_comp;
@@ -781,9 +784,6 @@
   unsigned int *mb_activity_map;
   int *mb_norm_activity_map;
 
-  // Force next frame to intra when kf_auto says so.
-  int force_next_frame_intra;
-
   int droppable;
 
   int dummy_packing;    /* flag to indicate if packing is dummy */
@@ -814,10 +814,6 @@
   // Debug / test stats
   int64_t mode_test_hits[BLOCK_SIZES];
 #endif
-
-  // Y,U,V,(A)
-  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
-  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
 } VP9_COMP;
 
 void vp9_initialize_enc();
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 12743b2..dd8a641 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -151,25 +151,6 @@
   cm->fc = cc->fc;
 }
 
-void vp9_setup_key_frame(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
-
-  vp9_setup_past_independence(cm);
-
-  /* All buffers are implicitly updated on key frames. */
-  cpi->refresh_golden_frame = 1;
-  cpi->refresh_alt_ref_frame = 1;
-}
-
-void vp9_setup_inter_frame(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
-  if (cm->error_resilient_mode || cm->intra_only)
-    vp9_setup_past_independence(cm);
-
-  assert(cm->frame_context_idx < FRAME_CONTEXTS);
-  cm->fc = cm->frame_contexts[cm->frame_context_idx];
-}
-
 static int estimate_bits_at_q(int frame_kind, int q, int mbs,
                               double correction_factor) {
   const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor));
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 2754395..87421af 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -87,9 +87,6 @@
 void vp9_save_coding_context(struct VP9_COMP *cpi);
 void vp9_restore_coding_context(struct VP9_COMP *cpi);
 
-void vp9_setup_key_frame(struct VP9_COMP *cpi);
-void vp9_setup_inter_frame(struct VP9_COMP *cpi);
-
 double vp9_convert_qindex_to_q(int qindex);
 
 void vp9_rc_init_minq_luts();
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 93f9999..4c38909 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1450,9 +1450,9 @@
   *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
 }
 
-static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
+static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode,
                        int mode_context) {
-  MACROBLOCK *const x = &cpi->mb;
+  const MACROBLOCK *const x = &cpi->mb;
   const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id;
 
   // Don't account for mode here if segment skip is enabled.
@@ -1669,6 +1669,45 @@
   return (mv->row & 0x0F) || (mv->col & 0x0F);
 }
 
+// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
+// TODO(aconverse): Find out if this is still productive then clean up or remove
+static int check_best_zero_mv(
+    const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
+    int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
+    int disable_inter_mode_mask, int this_mode, int ref_frame,
+    int second_ref_frame) {
+  if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+      (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
+      frame_mv[this_mode][ref_frame].as_int == 0 &&
+      (second_ref_frame == NONE ||
+       frame_mv[this_mode][second_ref_frame].as_int == 0)) {
+    int rfc = mode_context[ref_frame];
+    int c1 = cost_mv_ref(cpi, NEARMV, rfc);
+    int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
+    int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
+
+    if (this_mode == NEARMV) {
+      if (c1 > c3) return 0;
+    } else if (this_mode == NEARESTMV) {
+      if (c2 > c3) return 0;
+    } else {
+      assert(this_mode == ZEROMV);
+      if (second_ref_frame == NONE) {
+        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
+            (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
+          return 0;
+      } else {
+        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
+             frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
+            (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
+             frame_mv[NEARMV][second_ref_frame].as_int == 0))
+          return 0;
+      }
+    }
+  }
+  return 1;
+}
+
 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                     const TileInfo *const tile,
                                     BEST_SEG_INFO *bsi_buf, int filter_idx,
@@ -1737,43 +1776,11 @@
         if (disable_inter_mode_mask & (1 << mode_idx))
           continue;
 
-        // if we're near/nearest and mv == 0,0, compare to zeromv
-        if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
-            (this_mode == NEARMV || this_mode == NEARESTMV ||
-             this_mode == ZEROMV) &&
-            frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
-            (!has_second_rf ||
-             frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
-          int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
-          int c1 = cost_mv_ref(cpi, NEARMV, rfc);
-          int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
-          int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
-
-          if (this_mode == NEARMV) {
-            if (c1 > c3)
-              continue;
-          } else if (this_mode == NEARESTMV) {
-            if (c2 > c3)
-              continue;
-          } else {
-            assert(this_mode == ZEROMV);
-            if (!has_second_rf) {
-              if ((c3 >= c2 &&
-                   frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
-                  (c3 >= c1 &&
-                   frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
-                continue;
-            } else {
-              if ((c3 >= c2 &&
-                   frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
-                   frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
-                  (c3 >= c1 &&
-                   frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
-                   frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
-                continue;
-            }
-          }
-        }
+        if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
+                                disable_inter_mode_mask,
+                                this_mode, mbmi->ref_frame[0],
+                                mbmi->ref_frame[1]))
+          continue;
 
         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
@@ -3371,46 +3378,12 @@
         }
       }
     } else {
-      // TODO(aconverse): Find out if this is still productive then clean up or
-      // remove
-      // if we're near/nearest and mv == 0,0, compare to zeromv
       if (x->in_active_map &&
-          !(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
-          (this_mode == NEARMV || this_mode == NEARESTMV ||
-          this_mode == ZEROMV) &&
-          frame_mv[this_mode][ref_frame].as_int == 0 &&
-          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
-          (!comp_pred || frame_mv[this_mode][second_ref_frame].as_int == 0)) {
-        int rfc = mbmi->mode_context[ref_frame];
-        int c1 = cost_mv_ref(cpi, NEARMV, rfc);
-        int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
-        int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
-
-        if (this_mode == NEARMV) {
-          if (c1 > c3)
-            continue;
-        } else if (this_mode == NEARESTMV) {
-          if (c2 > c3)
-            continue;
-        } else {
-          assert(this_mode == ZEROMV);
-          if (!comp_pred) {
-            if ((c3 >= c2 &&
-                 frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
-                (c3 >= c1 &&
-                 frame_mv[NEARMV][ref_frame].as_int == 0))
-              continue;
-          } else {
-            if ((c3 >= c2 &&
-                 frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
-                 frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
-                (c3 >= c1 &&
-                 frame_mv[NEARMV][ref_frame].as_int == 0 &&
-                 frame_mv[NEARMV][second_ref_frame].as_int == 0))
-              continue;
-          }
-        }
-      }
+          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+        if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
+                                disable_inter_mode_mask, this_mode, ref_frame,
+                                second_ref_frame))
+          continue;
     }
 
     mbmi->mode = this_mode;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 2f42a41..271589c 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -132,7 +132,8 @@
 static vpx_codec_err_t vp9_destroy(vpx_codec_alg_priv_t *ctx) {
   int i;
 
-  vp9_remove_decompressor(ctx->pbi);
+  if (ctx->pbi)
+    vp9_remove_decompressor(ctx->pbi);
 
   for (i = NELEMENTS(ctx->mmaps) - 1; i >= 0; i--) {
     if (ctx->mmaps[i].dtor)
@@ -324,6 +325,8 @@
   }
 
   if (!res && ctx->pbi) {
+    VP9D_COMP *const pbi = ctx->pbi;
+    VP9_COMMON *const cm = &pbi->common;
     YV12_BUFFER_CONFIG sd;
     int64_t time_stamp = 0, time_end_stamp = 0;
     vp9_ppflags_t flags = {0};
@@ -348,15 +351,11 @@
 #endif
     }
 
-    if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) {
-      VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
-      res = update_error_state(ctx, &pbi->common.error);
-    }
+    if (vp9_receive_compressed_data(pbi, data_sz, data, deadline))
+      res = update_error_state(ctx, &cm->error);
 
-    if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp,
+    if (!res && 0 == vp9_get_raw_frame(pbi, &sd, &time_stamp,
                                        &time_end_stamp, &flags)) {
-      VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
-      VP9_COMMON *const cm = &pbi->common;
       yuvconfig2image(&ctx->img, &sd, user_priv);
 
       ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
@@ -653,12 +652,13 @@
 
 static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
                                             int ctrl_id, va_list args) {
-  int *update_info = va_arg(args, int *);
-  VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
+  int *const update_info = va_arg(args, int *);
 
   if (update_info) {
-    *update_info = pbi->refresh_frame_flags;
-
+    if (ctx->pbi)
+      *update_info = ctx->pbi->refresh_frame_flags;
+    else
+      return VPX_CODEC_ERROR;
     return VPX_CODEC_OK;
   } else {
     return VPX_CODEC_INVALID_PARAM;
@@ -671,9 +671,8 @@
   int *corrupted = va_arg(args, int *);
 
   if (corrupted) {
-    VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
-    if (pbi)
-      *corrupted = pbi->common.frame_to_show->corrupted;
+    if (ctx->pbi)
+      *corrupted = ctx->pbi->common.frame_to_show->corrupted;
     else
       return VPX_CODEC_ERROR;
     return VPX_CODEC_OK;
@@ -687,10 +686,10 @@
   int *const display_size = va_arg(args, int *);
 
   if (display_size) {
-    const VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
-    if (pbi) {
-      display_size[0] = pbi->common.display_width;
-      display_size[1] = pbi->common.display_height;
+    if (ctx->pbi) {
+      const VP9_COMMON *const cm = &ctx->pbi->common;
+      display_size[0] = cm->display_width;
+      display_size[1] = cm->display_height;
     } else {
       return VPX_CODEC_ERROR;
     }