Merge "update nestegg"
diff --git a/build/arm-msvs/obj_int_extract.bat b/build/arm-msvs/obj_int_extract.bat
index 7fd16a3..267ed61 100644
--- a/build/arm-msvs/obj_int_extract.bat
+++ b/build/arm-msvs/obj_int_extract.bat
@@ -7,8 +7,12 @@
 REM   be found in the AUTHORS file in the root of the source tree.
 echo on
 
+REM Arguments:
+REM   %1 - Relative path to the directory containing the vp8 and vpx_scale
+REM        source directories.
+REM   %2 - Path to obj_int_extract.exe.
 cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/encoder/vp8_asm_enc_offsets.c"
-obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
+%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
 
 cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vpx_scale/vpx_scale_asm_offsets.c"
-obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
+%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index a6315b9..ba9e83a 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -438,9 +438,13 @@
         for config in Debug Release; do
             open_tag ItemDefinitionGroup \
                 Condition="'\$(Configuration)|\$(Platform)'=='$config|$plat'"
-            if [ "$name" = "vpx" ]; then
+            if [ "$name" == "vpx" ]; then
+                hostplat=$plat
+                if [ "$hostplat" == "ARM" ]; then
+                    hostplat=Win32
+                fi
                 open_tag PreBuildEvent
-                tag_content Command "call obj_int_extract.bat $src_path_bare"
+                tag_content Command "call obj_int_extract.bat $src_path_bare $hostplat\\\$(Configuration)"
                 close_tag PreBuildEvent
             fi
             open_tag ClCompile
@@ -483,9 +487,7 @@
             case "$proj_kind" in
             exe)
                 open_tag Link
-                if [ "$name" = "obj_int_extract" ]; then
-                    tag_content OutputFile "${name}.exe"
-                else
+                if [ "$name" != "obj_int_extract" ]; then
                     tag_content AdditionalDependencies "$curlibs"
                     tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
                 fi
diff --git a/build/x86-msvs/obj_int_extract.bat b/build/x86-msvs/obj_int_extract.bat
index 4e9b0ec..44d095d 100644
--- a/build/x86-msvs/obj_int_extract.bat
+++ b/build/x86-msvs/obj_int_extract.bat
@@ -7,6 +7,9 @@
 REM   be found in the AUTHORS file in the root of the source tree.
 echo on
 
+REM Arguments:
+REM   %1 - Relative path to the directory containing the vp8 source directory.
+REM   %2 - Path to obj_int_extract.exe.
 cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/vp8_asm_enc_offsets.c"
-obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
+%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
 
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 5ae76d7..39c9a5a 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -381,7 +381,6 @@
   }
 }
 
-#if CONFIG_NON420
 // Check basic rate targeting,
 TEST_P(DatarateTestVP9, BasicRateTargeting444) {
   ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
@@ -410,7 +409,6 @@
         << cfg_.rc_target_bitrate << " "<< effective_datarate_;
   }
 }
-#endif
 
 // Check that (1) the first dropped frame gets earlier and earlier
 // as the drop frame threshold is increased, and (2) that the total number of
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index 9342813..aba8a3c 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -161,9 +161,7 @@
   "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
   "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
   "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
-#if CONFIG_NON420
   "vp91-2-04-yv444.webm"
-#endif
 };
 #endif  // CONFIG_VP9_DECODER
 
diff --git a/test/test_vectors.h b/test/test_vectors.h
index 307f732..d5ecc96 100644
--- a/test/test_vectors.h
+++ b/test/test_vectors.h
@@ -21,11 +21,7 @@
 #endif
 
 #if CONFIG_VP9_DECODER
-#if CONFIG_NON420
 const int kNumVp9TestVectors = 223;
-#else
-const int kNumVp9TestVectors = 222;
-#endif
 
 extern const char *kVP9TestVectors[kNumVp9TestVectors];
 #endif  // CONFIG_VP9_DECODER
diff --git a/test/vp9_lossless_test.cc b/test/vp9_lossless_test.cc
index 2282687..ad7ba44 100644
--- a/test/vp9_lossless_test.cc
+++ b/test/vp9_lossless_test.cc
@@ -73,7 +73,6 @@
   EXPECT_GE(psnr_lossless, kMaxPsnr);
 }
 
-#if CONFIG_NON420
 TEST_P(LossLessTest, TestLossLessEncoding444) {
   libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 10);
 
@@ -90,7 +89,6 @@
   const double psnr_lossless = GetMinPsnr();
   EXPECT_GE(psnr_lossless, kMaxPsnr);
 }
-#endif
 
 VP9_INSTANTIATE_TEST_CASE(LossLessTest, ALL_TEST_MODES);
 }  // namespace
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 04f8934..fe2056d 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -868,7 +868,6 @@
   assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
 }
 
-#if CONFIG_NON420
 static uint8_t build_lfi(const loop_filter_info_n *lfi_n,
                      const MB_MODE_INFO *mbmi) {
   const int seg = mbmi->segment_id;
@@ -1046,7 +1045,6 @@
     dst->buf += 8 * dst->stride;
   }
 }
-#endif
 
 void vp9_filter_block_plane(VP9_COMMON *const cm,
                             struct macroblockd_plane *const plane,
@@ -1206,10 +1204,8 @@
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
   int mi_row, mi_col;
   LOOP_FILTER_MASK lfm;
-#if CONFIG_NON420
   int use_420 = y_only || (xd->plane[1].subsampling_y == 1 &&
       xd->plane[1].subsampling_x == 1);
-#endif
 
   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
     MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
@@ -1220,22 +1216,16 @@
       setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
 
       // TODO(JBB): Make setup_mask work for non 420.
-#if CONFIG_NON420
       if (use_420)
-#endif
         vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col,
                        cm->mode_info_stride, &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
-#if CONFIG_NON420
         if (use_420)
-#endif
           vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
-#if CONFIG_NON420
         else
           filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
                                     mi_row, mi_col);
-#endif
       }
     }
   }
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 7cbdfce..85f6c97 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -49,7 +49,6 @@
   int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
   int rate;
   int distortion;
-  int64_t intra_error;
   int best_mode_index;
   int rddiv;
   int rdmult;
@@ -63,9 +62,6 @@
   // search loop
   int_mv pred_mv[MAX_REF_FRAMES];
   INTERP_FILTER pred_interp_filter;
-
-  // Bit flag for each mode whether it has high error in comparison to others.
-  unsigned int modes_with_high_error;
 } PICK_MODE_CONTEXT;
 
 struct macroblock_plane {
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 0d0e59e..dbb6ea7 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -194,6 +194,9 @@
   int64_t this_rd;
   int64_t cost[4]= { 0, 50, 75, 100 };
 
+  const int64_t inter_mode_thresh = 300;
+  const int64_t intra_mode_cost = 50;
+
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
   x->skip = 0;
@@ -264,6 +267,31 @@
     }
   }
 
+  // Perform intra prediction search, if the best SAD is above a certain
+  // threshold.
+  if (best_rd > inter_mode_thresh) {
+    struct macroblock_plane *const p = &x->plane[0];
+    struct macroblockd_plane *const pd = &xd->plane[0];
+    for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
+      vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
+                              mbmi->tx_size, this_mode,
+                              &p->src.buf[0], p->src.stride,
+                              &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
+
+      this_rd = cpi->fn_ptr[bsize].sdf(p->src.buf,
+                                       p->src.stride,
+                                       pd->dst.buf,
+                                       pd->dst.stride, INT_MAX);
+
+      if (this_rd + intra_mode_cost < best_rd) {
+        best_rd = this_rd;
+        mbmi->mode = this_mode;
+        mbmi->ref_frame[0] = INTRA_FRAME;
+        mbmi->uv_mode = this_mode;
+      }
+    }
+  }
+
   // Perform sub-pixel motion search, if NEWMV is chosen
   if (mbmi->mode == NEWMV) {
     ref_frame = mbmi->ref_frame[0];
@@ -273,8 +301,5 @@
     xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
   }
 
-  // TODO(jingning) intra prediction search, if the best SAD is above a certain
-  // threshold.
-
   return INT64_MAX;
 }
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 70f9fab..5ff2c84 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3127,10 +3127,10 @@
                                   BLOCK_SIZE bsize,
                                   PICK_MODE_CONTEXT *ctx,
                                   int64_t best_rd_so_far) {
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
-  const struct segmentation *seg = &cm->seg;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  const struct segmentation *const seg = &cm->seg;
   const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
   MB_PREDICTION_MODE this_mode;
   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
@@ -3166,14 +3166,14 @@
   const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
   const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
   int best_skip2 = 0;
-  int ref_frame_mask = 0;
   int mode_skip_mask = 0;
+  const int mode_skip_start = cpi->sf.mode_skip_start + 1;
+  const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
+  const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+  const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
 
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
-  // Everywhere the flag is set the error is much higher than its neighbors.
-  ctx->modes_with_high_error = 0;
-
   estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
                            &comp_mode_p);
 
@@ -3201,13 +3201,26 @@
     frame_mv[ZEROMV][ref_frame].as_int = 0;
   }
 
-  for (ref_frame = LAST_FRAME;
-       ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
-    int i;
-    for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
-      if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
-        ref_frame_mask |= (1 << ref_frame);
-        break;
+  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+    // All modes from vp9_mode_order that use this frame as any ref
+    static const int ref_frame_mask_all[] = {
+        0x123291, 0x25c444, 0x39b722
+    };
+    // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use
+    // this frame as their primary ref
+    static const int ref_frame_mask_fixedmv[] = {
+        0x121281, 0x24c404, 0x080102
+    };
+    if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
+      // Skip modes for missing references
+      mode_skip_mask |= ref_frame_mask_all[ref_frame - LAST_FRAME];
+    } else if (cpi->sf.reference_masking) {
+      for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
+        // Skip fixed mv modes for poor references
+        if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
+          mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame - LAST_FRAME];
+          break;
+        }
       }
     }
   }
@@ -3226,107 +3239,61 @@
     int64_t total_sse = INT_MAX;
     int early_term = 0;
 
-    for (i = 0; i < TX_MODES; ++i)
-      tx_cache[i] = INT64_MAX;
+    // Look at the reference frame of the best mode so far and set the
+    // skip mask to look at a subset of the remaining modes.
+    if (mode_index == mode_skip_start) {
+      switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
+        case INTRA_FRAME:
+          break;
+        case LAST_FRAME:
+          mode_skip_mask |= LAST_FRAME_MODE_MASK;
+          break;
+        case GOLDEN_FRAME:
+          mode_skip_mask |= GOLDEN_FRAME_MODE_MASK;
+          break;
+        case ALTREF_FRAME:
+          mode_skip_mask |= ALT_REF_MODE_MASK;
+          break;
+        case NONE:
+        case MAX_REF_FRAMES:
+          assert(0 && "Invalid Reference frame");
+      }
+    }
+    if (mode_skip_mask & (1 << mode_index))
+      continue;
 
-    x->skip = 0;
+    // Test best rd so far against threshold for trying this mode.
+    if (best_rd < ((int64_t)rd_threshes[mode_index] *
+                  rd_thresh_freq_fact[mode_index] >> 5) ||
+        rd_threshes[mode_index] == INT_MAX)
+     continue;
+
     this_mode = vp9_mode_order[mode_index].mode;
     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
 
-    // Look at the reference frame of the best mode so far and set the
-    // skip mask to look at a subset of the remaining modes.
-    if (mode_index > cpi->sf.mode_skip_start) {
-      if (mode_index == (cpi->sf.mode_skip_start + 1)) {
-        switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
-          case INTRA_FRAME:
-            mode_skip_mask = 0;
-            break;
-          case LAST_FRAME:
-            mode_skip_mask = LAST_FRAME_MODE_MASK;
-            break;
-          case GOLDEN_FRAME:
-            mode_skip_mask = GOLDEN_FRAME_MODE_MASK;
-            break;
-          case ALTREF_FRAME:
-            mode_skip_mask = ALT_REF_MODE_MASK;
-            break;
-          case NONE:
-          case MAX_REF_FRAMES:
-            assert(0 && "Invalid Reference frame");
-        }
-      }
-      if (mode_skip_mask & (1 << mode_index))
-        continue;
-    }
-
-    // Skip if the current reference frame has been masked off
-    if (ref_frame_mask & (1 << ref_frame) && this_mode != NEWMV)
-      continue;
-
-    // Test best rd so far against threshold for trying this mode.
-    if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] *
-                     cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
-        cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX)
-      continue;
-
-    // Do not allow compound prediction if the segment level reference
-    // frame feature is in use as in this case there can only be one reference.
-    if ((second_ref_frame > INTRA_FRAME) &&
-         vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
-      continue;
-
-    mbmi->ref_frame[0] = ref_frame;
-    mbmi->ref_frame[1] = second_ref_frame;
-
-    if (!(ref_frame == INTRA_FRAME
-        || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
-      continue;
-    }
-    if (!(second_ref_frame == NONE
-        || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
-      continue;
-    }
-
     comp_pred = second_ref_frame > INTRA_FRAME;
     if (comp_pred) {
-      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
-        if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
-          continue;
-      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
-        if (ref_frame != best_inter_ref_frame &&
-            second_ref_frame != best_inter_ref_frame)
-          continue;
-    }
-
-    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
-    mbmi->uv_mode = DC_PRED;
-
-    // Evaluate all sub-pel filters irrespective of whether we can use
-    // them for this frame.
-    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
-                                                          : cm->interp_filter;
-    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
-
-    if (comp_pred) {
-      if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
+      // Do not allow compound prediction if the segment level reference
+      // frame feature is in use as in this case there can only be one
+      // reference.
+      if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
         continue;
-
-      mode_excluded = mode_excluded ? mode_excluded
-                                    : cm->reference_mode == SINGLE_REFERENCE;
+      if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+          vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
+        continue;
+      if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
+          ref_frame != best_inter_ref_frame &&
+          second_ref_frame != best_inter_ref_frame)
+        continue;
+      mode_excluded = mode_excluded ?
+            mode_excluded : cm->reference_mode == SINGLE_REFERENCE;
     } else {
       if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME)
         mode_excluded = mode_excluded ?
             mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
     }
 
-    // Select prediction reference frames.
-    for (i = 0; i < MAX_MB_PLANE; i++) {
-      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
-      if (comp_pred)
-        xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
-    }
-
     // If the segment reference frame feature is enabled....
     // then do nothing if the current ref frame is not allowed..
     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
@@ -3368,38 +3335,60 @@
       continue;
     }
 
-#ifdef MODE_TEST_HIT_STATS
-    // TEST/DEBUG CODE
-    // Keep a rcord of the number of test hits at each size
-    cpi->mode_test_hits[bsize]++;
-#endif
-
-
     if (ref_frame == INTRA_FRAME) {
-      TX_SIZE uv_tx;
       // Disable intra modes other than DC_PRED for blocks with low variance
       // Threshold for intra skipping based on source variance
       // TODO(debargha): Specialize the threshold for super block sizes
       static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = {
         64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
       };
-      if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
+      if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
           this_mode != DC_PRED &&
-          x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
+          x->source_variance < skip_intra_var_thresh[bsize])
         continue;
       // Only search the oblique modes if the best so far is
       // one of the neighboring directional modes
-      if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
+      if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
           (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
         if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
           continue;
       }
-      mbmi->mode = this_mode;
-      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
-        if (conditional_skipintra(mbmi->mode, best_intra_mode))
+      if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
+        if (conditional_skipintra(this_mode, best_intra_mode))
             continue;
       }
+    }
 
+    mbmi->mode = this_mode;
+    mbmi->uv_mode = DC_PRED;
+    mbmi->ref_frame[0] = ref_frame;
+    mbmi->ref_frame[1] = second_ref_frame;
+    // Evaluate all sub-pel filters irrespective of whether we can use
+    // them for this frame.
+    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+                                                          : cm->interp_filter;
+    x->skip = 0;
+    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
+    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
+
+    // Select prediction reference frames.
+    for (i = 0; i < MAX_MB_PLANE; i++) {
+      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
+      if (comp_pred)
+        xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
+    }
+
+    for (i = 0; i < TX_MODES; ++i)
+      tx_cache[i] = INT64_MAX;
+
+#ifdef MODE_TEST_HIT_STATS
+    // TEST/DEBUG CODE
+    // Keep a rcord of the number of test hits at each size
+    cpi->mode_test_hits[bsize]++;
+#endif
+
+    if (ref_frame == INTRA_FRAME) {
+      TX_SIZE uv_tx;
       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
                       bsize, tx_cache, best_rd);
 
@@ -3423,8 +3412,6 @@
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
     } else {
-      mbmi->mode = this_mode;
-      compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
       this_rd = handle_inter_mode(cpi, x, tile, bsize,
                                   tx_cache,
                                   &rate2, &distortion2, &skippable,
@@ -3436,14 +3423,16 @@
                                   single_newmv, &total_sse, best_rd);
       if (this_rd == INT64_MAX)
         continue;
-    }
 
-    if (cm->reference_mode == REFERENCE_MODE_SELECT)
-      rate2 += compmode_cost;
+      compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
+
+      if (cm->reference_mode == REFERENCE_MODE_SELECT)
+        rate2 += compmode_cost;
+    }
 
     // Estimate the reference frame signaling cost and add it
     // to the rolling cost variable.
-    if (second_ref_frame > INTRA_FRAME) {
+    if (comp_pred) {
       rate2 += ref_costs_comp[ref_frame];
     } else {
       rate2 += ref_costs_single[ref_frame];
@@ -3551,7 +3540,7 @@
 
         // TODO(debargha): enhance this test with a better distortion prediction
         // based on qp, activity mask and history
-        if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
+        if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
             (mode_index > MIN_EARLY_TERM_INDEX)) {
           const int qstep = xd->plane[0].dequant[1];
           // TODO(debargha): Enhance this by specializing for each mode_index
@@ -3661,17 +3650,6 @@
     }
   }
 
-  // Flag all modes that have a distortion thats > 2x the best we found at
-  // this level.
-  for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
-    if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
-      continue;
-
-    if (mode_distortions[mode_index] > 2 * *returndistortion) {
-      ctx->modes_with_high_error |= (1 << mode_index);
-    }
-  }
-
   assert((cm->interp_filter == SWITCHABLE) ||
          (cm->interp_filter == best_mbmode.interp_filter) ||
          !is_inter_block(&best_mbmode));
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 76cbebf..b85e172 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -159,11 +159,7 @@
 
     if (frame_marker != VP9_FRAME_MARKER)
       return VPX_CODEC_UNSUP_BITSTREAM;
-#if CONFIG_NON420
     if (version > 1) return VPX_CODEC_UNSUP_BITSTREAM;
-#else
-    if (version != 0) return VPX_CODEC_UNSUP_BITSTREAM;
-#endif
 
     if (vp9_rb_read_bit(&rb)) {  // show an existing frame
       return VPX_CODEC_OK;
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index f7dde62..0b637d4 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -195,6 +195,11 @@
 
   VP9E_SET_SVC,
   VP9E_SET_SVC_PARAMETERS,
+  /*!\brief control function to set svc layer for spatial and temporal.
+   * \note Valid ranges: 0..#vpx_codec_enc_cfg::ss_number_layers for spatial
+   *                     layer and 0..#vpx_codec_enc_cfg::ts_number_layers for
+   *                     temporal layer.
+   */
   VP9E_SET_SVC_LAYER_ID
 };
 
diff --git a/vpxenc.c b/vpxenc.c
index 02856d9..8cd5a10 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -231,6 +231,10 @@
 static const arg_def_t disable_warning_prompt =
     ARG_DEF("y", "disable-warning-prompt", 0,
             "Display warnings, but do not prompt user to continue.");
+static const arg_def_t experimental_bitstream =
+    ARG_DEF(NULL, "experimental-bitstream", 0,
+            "Allow experimental bitstream features.");
+
 
 static const arg_def_t *main_args[] = {
   &debugmode,
@@ -713,6 +717,8 @@
       global->disable_warnings = 1;
     else if (arg_match(&arg, &disable_warning_prompt, argi))
       global->disable_warning_prompt = 1;
+    else if (arg_match(&arg, &experimental_bitstream, argi))
+      global->experimental_bitstream = 1;
     else
       argj++;
   }
@@ -1002,13 +1008,20 @@
   } while (0)
 
 
-static void validate_stream_config(struct stream_state *stream) {
-  struct stream_state *streami;
+static void validate_stream_config(const struct stream_state *stream,
+                                   const struct VpxEncoderConfig *global) {
+  const struct stream_state *streami;
 
   if (!stream->config.cfg.g_w || !stream->config.cfg.g_h)
     fatal("Stream %d: Specify stream dimensions with --width (-w) "
           " and --height (-h)", stream->index);
 
+  if (stream->config.cfg.g_profile != 0 && !global->experimental_bitstream) {
+    fatal("Stream %d: profile %d is experimental and requires the --%s flag",
+          stream->index, stream->config.cfg.g_profile,
+          experimental_bitstream.long_name);
+  }
+
   for (streami = stream; streami; streami = streami->next) {
     /* All streams require output files */
     if (!streami->config.out_fn)
@@ -1540,11 +1553,9 @@
   if (!input.filename)
     usage_exit();
 
-#if CONFIG_NON420
   /* Decide if other chroma subsamplings than 4:2:0 are supported */
   if (global.codec->fourcc == VP9_FOURCC)
     input.only_i420 = 0;
-#endif
 
   for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {
     int frames_in = 0, seen_frames = 0;
@@ -1571,7 +1582,7 @@
       fatal("Specify stream dimensions with --width (-w) "
             " and --height (-h)");
     FOREACH_STREAM(set_stream_dimensions(stream, input.width, input.height));
-    FOREACH_STREAM(validate_stream_config(stream));
+    FOREACH_STREAM(validate_stream_config(stream, &global));
 
     /* Ensure that --passes and --pass are consistent. If --pass is set and
      * --passes=2, ensure --fpf was set.
diff --git a/vpxenc.h b/vpxenc.h
index 1e6acaa..a8c3722 100644
--- a/vpxenc.h
+++ b/vpxenc.h
@@ -46,6 +46,7 @@
   int show_rate_hist_buckets;
   int disable_warnings;
   int disable_warning_prompt;
+  int experimental_bitstream;
 };
 
 #ifdef __cplusplus