Merge "Removing legacy XMA features from libvpx."
diff --git a/configure b/configure
index 789e6c3..92ca061 100755
--- a/configure
+++ b/configure
@@ -46,6 +46,9 @@
   ${toggle_realtime_only}         enable this option while building for real-time encoding
   ${toggle_onthefly_bitpacking}   enable on-the-fly bitpacking in real-time encoding
   ${toggle_error_concealment}     enable this option to get a decoder which is able to conceal losses
+  ${toggle_coefficient_range_checking}
+                                  enable decoder to check if intermediate
+                                  transform coefficients are in valid range
   ${toggle_runtime_cpu_detect}    runtime cpu detection
   ${toggle_shared}                shared library support
   ${toggle_static}                static library support
@@ -327,6 +330,7 @@
     encode_perf_tests
     multi_res_encoding
     temporal_denoising
+    coefficient_range_checking
     experimental
     size_limit
     ${EXPERIMENT_LIST}
@@ -384,6 +388,7 @@
     encode_perf_tests
     multi_res_encoding
     temporal_denoising
+    coefficient_range_checking
     experimental
 "
 
diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc
index 3e4ef0a..99610eb 100644
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -67,24 +67,33 @@
                           const vpx_codec_dec_cfg_t &dec_cfg) {
   Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
   ASSERT_TRUE(decoder != NULL);
+  bool end_of_file = false;
 
   // Decode frames.
-  for (video->Begin(); !::testing::Test::HasFailure() && video->cxdata();
+  for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file;
        video->Next()) {
     PreDecodeFrameHook(*video, decoder);
 
     vpx_codec_stream_info_t stream_info;
     stream_info.sz = sizeof(stream_info);
-    const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(),
-                                                         video->frame_size(),
-                                                         &stream_info);
-    HandlePeekResult(decoder, video, res_peek);
-    ASSERT_FALSE(::testing::Test::HasFailure());
 
-    vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),
-                                                   video->frame_size());
-    if (!HandleDecodeResult(res_dec, *video, decoder))
-      break;
+    if (video->cxdata() != NULL) {
+      const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(),
+                                                           video->frame_size(),
+                                                           &stream_info);
+      HandlePeekResult(decoder, video, res_peek);
+      ASSERT_FALSE(::testing::Test::HasFailure());
+
+      vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),
+                                                     video->frame_size());
+      if (!HandleDecodeResult(res_dec, *video, decoder))
+        break;
+    } else {
+      // Signal end of the file to the decoder.
+      const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
+      ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
+      end_of_file = true;
+    }
 
     DxDataIterator dec_iter = decoder->GetDxData();
     const vpx_image_t *img = NULL;
diff --git a/test/sad_test.cc b/test/sad_test.cc
index f07a989..e63770b 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -640,19 +640,9 @@
 
 #if HAVE_AVX2
 #if CONFIG_VP9_ENCODER
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
-                          const uint8_t *const ref_ptr[], int ref_stride,
-                          unsigned int *sad_array);
-void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
-                          const uint8_t *const ref_ptr[], int ref_stride,
-                          unsigned int *sad_array);
-}
 const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
 const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
+INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
                         make_tuple(32, 32, sad_32x32x4d_avx2),
                         make_tuple(64, 64, sad_64x64x4d_avx2)));
 #endif  // CONFIG_VP9_ENCODER
diff --git a/test/tools_common.sh b/test/tools_common.sh
index e98bead..0bfefba 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -182,65 +182,6 @@
   [ "$(vpx_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes
 }
 
-# Echoes yes to stdout when vpxenc exists according to vpx_tool_available().
-vpxenc_available() {
-  [ -n $(vpx_tool_available vpxenc) ] && echo yes
-}
-
-# Wrapper function for running vpxenc. Positional parameters are interpreted as
-# follows:
-#   1 - codec name
-#   2 - input width
-#   3 - input height
-#   4 - number of frames to encode
-#   5 - path to input file
-#   6 - path to output file
-#       Note: The output file path must end in .ivf to output an IVF file.
-#   7 - extra flags
-#       Note: Extra flags currently supports a special case: when set to "-"
-#             input is piped to vpxenc via cat.
-vpxenc() {
-  local encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}"
-  local codec="${1}"
-  local width=${2}
-  local height=${3}
-  local frames=${4}
-  local input=${5}
-  local output="${VPX_TEST_OUTPUT_DIR}/${6}"
-  local extra_flags=${7}
-
-  # Because --ivf must be within the command line to get IVF from vpxenc.
-  if echo "${output}" | egrep -q 'ivf$'; then
-    use_ivf=--ivf
-  else
-    unset use_ivf
-  fi
-
-  if [ "${extra_flags}" = "-" ]; then
-    pipe_input=yes
-    extra_flags=${8}
-  else
-    unset pipe_input
-  fi
-
-  if [ -z "${pipe_input}" ]; then
-    eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} --width=${width} \
-        --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \
-        --output="${output}" "${input}" ${devnull}
-  else
-    cat "${input}" \
-        | eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} \
-            --width=${width} --height=${height} --limit=${frames} ${use_ivf} \
-            ${extra_flags} --output="${output}" - ${devnull}
-  fi
-
-  if [ ! -e "${output}" ]; then
-    # Return non-zero exit status: output file doesn't exist, so something
-    # definitely went wrong.
-    return 1
-  fi
-}
-
 # Filters strings from positional parameter one using the filter specified by
 # positional parameter two. Filter behavior depends on the presence of a third
 # positional parameter. When parameter three is present, strings that match the
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 40b7df6..7d81182 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -707,24 +707,7 @@
 #endif
 
 #if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-unsigned int vp9_sub_pixel_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-}
+
 const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
 const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
 const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
@@ -743,7 +726,7 @@
 const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
     vp9_sub_pixel_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelVarianceTest,
+    AVX2, VP9SubpelVarianceTest,
     ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
                       make_tuple(6, 6, subpel_variance64x64_avx2)));
 
@@ -752,7 +735,7 @@
 const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
     vp9_sub_pixel_avg_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelAvgVarianceTest,
+    AVX2, VP9SubpelAvgVarianceTest,
     ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
                       make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
 #endif  // HAVE_AVX2
diff --git a/test/vp9_spatial_svc_encoder.sh b/test/vp9_spatial_svc_encoder.sh
index 7a964a9..6dd5f17 100755
--- a/test/vp9_spatial_svc_encoder.sh
+++ b/test/vp9_spatial_svc_encoder.sh
@@ -47,43 +47,9 @@
   [ -e "${output_file}" ] || return 1
 }
 
-# Each mode is run with layer count 1-$vp9_ssvc_test_layers.
+# Each test is run with layer count 1-$vp9_ssvc_test_layers.
 vp9_ssvc_test_layers=5
 
-DISABLED_vp9_spatial_svc_mode_i() {
-  if [ "$(vp9_encode_available)" = "yes" ]; then
-    local readonly test_name="DISABLED_vp9_spatial_svc_mode_i"
-    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
-      vp9_spatial_svc_encoder "${test_name}" -m i -l ${layers}
-    done
-  fi
-}
-
-DISABLED_vp9_spatial_svc_mode_altip() {
-  if [ "$(vp9_encode_available)" = "yes" ]; then
-    local readonly test_name="DISABLED_vp9_spatial_svc_mode_altip"
-    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
-      vp9_spatial_svc_encoder "${test_name}" -m "alt-ip" -l ${layers}
-    done
-  fi
-}
-
-DISABLED_vp9_spatial_svc_mode_ip() {
-  if [ "$(vp9_encode_available)" = "yes" ]; then
-    local readonly test_name="DISABLED_vp9_spatial_svc_mode_ip"
-    vp9_spatial_svc_encoder "${test_name}" -m ip -l 1
-  fi
-}
-
-DISABLED_vp9_spatial_svc_mode_gf() {
-  if [ "$(vp9_encode_available)" = "yes" ]; then
-    local readonly test_name="DISABLED_vp9_spatial_svc_mode_gf"
-    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
-      vp9_spatial_svc_encoder "${test_name}" -m gf -l ${layers}
-    done
-  fi
-}
-
 vp9_spatial_svc() {
   if [ "$(vp9_encode_available)" = "yes" ]; then
     local readonly test_name="vp9_spatial_svc"
diff --git a/test/vpxenc.sh b/test/vpxenc.sh
index f08c048..dcfa7f7 100755
--- a/test/vpxenc.sh
+++ b/test/vpxenc.sh
@@ -39,6 +39,65 @@
   fi
 }
 
+# Echoes yes to stdout when vpxenc exists according to vpx_tool_available().
+vpxenc_available() {
+  [ -n $(vpx_tool_available vpxenc) ] && echo yes
+}
+
+# Wrapper function for running vpxenc. Positional parameters are interpreted as
+# follows:
+#   1 - codec name
+#   2 - input width
+#   3 - input height
+#   4 - number of frames to encode
+#   5 - path to input file
+#   6 - path to output file
+#       Note: The output file path must end in .ivf to output an IVF file.
+#   7 - extra flags
+#       Note: Extra flags currently supports a special case: when set to "-"
+#             input is piped to vpxenc via cat.
+vpxenc() {
+  local encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}"
+  local codec="${1}"
+  local width=${2}
+  local height=${3}
+  local frames=${4}
+  local input=${5}
+  local output="${VPX_TEST_OUTPUT_DIR}/${6}"
+  local extra_flags=${7}
+
+  # Because --ivf must be within the command line to get IVF from vpxenc.
+  if echo "${output}" | egrep -q 'ivf$'; then
+    use_ivf=--ivf
+  else
+    unset use_ivf
+  fi
+
+  if [ "${extra_flags}" = "-" ]; then
+    pipe_input=yes
+    extra_flags=${8}
+  else
+    unset pipe_input
+  fi
+
+  if [ -z "${pipe_input}" ]; then
+    eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} --width=${width} \
+        --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \
+        --output="${output}" "${input}" ${devnull}
+  else
+    cat "${input}" \
+        | eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} \
+            --width=${width} --height=${height} --limit=${frames} ${use_ivf} \
+            ${extra_flags} --output="${output}" - ${devnull}
+  fi
+
+  if [ ! -e "${output}" ]; then
+    # Return non-zero exit status: output file doesn't exist, so something
+    # definitely went wrong.
+    return 1
+  fi
+}
+
 vpxenc_vp8_ivf() {
   if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then
     vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
@@ -76,10 +135,10 @@
   fi
 }
 
-DISABLED_vpxenc_vp9_ivf_lossless() {
+vpxenc_vp9_ivf_lossless() {
   if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
     vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
-        "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless
+        "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless=1
   fi
 }
 
@@ -88,6 +147,6 @@
               vpxenc_vp8_ivf_pipe_input
               vpxenc_vp9_ivf
               vpxenc_vp9_webm
-              DISABLED_vpxenc_vp9_ivf_lossless"
+              vpxenc_vp9_ivf_lossless"
 
 run_tests vpxenc_verify_environment "${vpxenc_tests}"
diff --git a/tools_common.c b/tools_common.c
index b481579..7cfd066 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -246,7 +246,7 @@
     int y;
 
     for (y = 0; y < h; ++y) {
-      if (fread(buf, 1, w, file) != w)
+      if (fread(buf, 1, w, file) != (size_t)w)
         return 0;
       buf += stride;
     }
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 298f50f..e93b65c 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1277,6 +1277,16 @@
 
     vpx_free(cpi->tplist);
     CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows));
+
+#if CONFIG_TEMPORAL_DENOISING
+    if (cpi->oxcf.noise_sensitivity > 0) {
+      vp8_denoiser_free(&cpi->denoiser);
+      vp8_denoiser_allocate(&cpi->denoiser, width, height,
+                            cm->mb_rows, cm->mb_cols,
+                            ((cpi->oxcf.noise_sensitivity == 3) ?
+                            1 : 0));
+    }
+#endif
 }
 
 
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index c905625..0fe0c92 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -60,6 +60,7 @@
     vpx_decrypt_cb          decrypt_cb;
     void                    *decrypt_state;
     vpx_image_t             img;
+    int                     flushed;
     int                     img_setup;
     struct frame_buffers    yv12_frame_buffers;
     void                    *user_priv;
@@ -88,6 +89,7 @@
     ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
     ctx->priv->alg_priv->decrypt_cb = NULL;
     ctx->priv->alg_priv->decrypt_state = NULL;
+    ctx->priv->alg_priv->flushed = 0;
     ctx->priv->init_flags = ctx->init_flags;
 
     if (ctx->config.dec)
@@ -328,6 +330,13 @@
     unsigned int resolution_change = 0;
     unsigned int w, h;
 
+    if (data == NULL && data_sz == 0) {
+      ctx->flushed = 1;
+      return VPX_CODEC_OK;
+    }
+
+    /* Reset flushed when receiving a valid frame */
+    ctx->flushed = 0;
 
     /* Update the input fragment data */
     if(update_fragments(ctx, data, data_sz, &res) <= 0)
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 0c2898d..d776313 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -27,10 +27,10 @@
 
 // Bitstream profiles indicated by 2-3 bits in the uncompressed header.
 // 00: Profile 0.  8-bit 4:2:0 only.
-// 10: Profile 1.  Adds 4:4:4, 4:2:2, and 4:4:0 to Profile 0.
-// 01: Profile 2.  Supports 10-bit and 12-bit color only, with 4:2:0 sampling.
-// 110: Profile 3. Supports 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0
-//                 subsampling.
+// 10: Profile 1.  8-bit 4:4:4, 4:2:2, and 4:4:0.
+// 01: Profile 2.  10-bit and 12-bit color only, with 4:2:0 sampling.
+// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0
+//                 sampling.
 // 111: Undefined profile.
 typedef enum BITSTREAM_PROFILE {
   PROFILE_0,
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 3253bcb..7f595e1 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -81,6 +81,16 @@
 
 static INLINE int dct_const_round_shift(int input) {
   int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+  // For valid VP9 input streams, intermediate stage coefficients should always
+  // stay within the range of a signed 16 bit integer. Coefficients can go out
+  // of this range for invalid/corrupt VP9 streams. However, strictly checking
+  // this range for every intermediate coefficient can burdensome for a decoder,
+  // therefore the following assertion is only enabled when configured with
+  // --enable-coefficient-range-checking.
+  assert(INT16_MIN <= rv);
+  assert(rv <= INT16_MAX);
+#endif
   return (int16_t)rv;
 }
 
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 0fe58c5..ab64d30 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -20,7 +20,7 @@
                              int block, int mi_row, int mi_col) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
-  const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
+  const MODE_INFO *prev_mi = !cm->error_resilient_mode && cm->prev_mi
         ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]
         : NULL;
   const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 97b267c..ae6c6ff 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -188,11 +188,6 @@
   int error_resilient_mode;
   int frame_parallel_decoding_mode;
 
-  // Flag indicates if prev_mi can be used in coding:
-  //   0: encoder assumes decoder does not have prev_mi
-  //   1: encoder assumes decoder has and uses prev_mi
-  unsigned int coding_use_prev_mi;
-
   int log2_tile_cols, log2_tile_rows;
 
   // Private data associated with the frame buffer callbacks.
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index cdf5fb9..0146384 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -366,7 +366,7 @@
   return (above_ctx + left_ctx) > max_tx_size;
 }
 
-int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
+int vp9_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids,
                        BLOCK_SIZE bsize, int mi_row, int mi_col) {
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
   const int bw = num_8x8_blocks_wide_lookup[bsize];
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 1a7ba86..2c96506 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -26,7 +26,7 @@
   return xd->left_available ? xd->mi[-1] : NULL;
 }
 
-int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
+int vp9_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids,
                        BLOCK_SIZE bsize, int mi_row, int mi_col);
 
 static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) {
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 8d91791..708f41b 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -447,10 +447,10 @@
 specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -477,10 +477,10 @@
 specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -653,7 +653,7 @@
 specialize qw/vp9_sad4x4x8 sse4/;
 
 add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2/;
+specialize qw/vp9_sad64x64x4d sse2 avx2/;
 
 add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad32x64x4d sse2/;
@@ -668,7 +668,7 @@
 specialize qw/vp9_sad16x32x4d sse2/;
 
 add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2/;
+specialize qw/vp9_sad32x32x4d sse2 avx2/;
 
 add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad16x16x4d sse2/;
diff --git a/vp9/common/x86/vp9_postproc_x86.h b/vp9/common/x86/vp9_postproc_x86.h
deleted file mode 100644
index cab9d34..0000000
--- a/vp9/common/x86/vp9_postproc_x86.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_COMMON_X86_VP9_POSTPROC_X86_H_
-#define VP9_COMMON_X86_VP9_POSTPROC_X86_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-
-#if HAVE_MMX
-extern prototype_postproc_inplace(vp9_mbpost_proc_down_mmx);
-extern prototype_postproc(vp9_post_proc_down_and_across_mmx);
-extern prototype_postproc_addnoise(vp9_plane_add_noise_mmx);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp9_postproc_down
-#define vp9_postproc_down vp9_mbpost_proc_down_mmx
-
-#undef  vp9_postproc_downacross
-#define vp9_postproc_downacross vp9_post_proc_down_and_across_mmx
-
-#undef  vp9_postproc_addnoise
-#define vp9_postproc_addnoise vp9_plane_add_noise_mmx
-
-#endif
-#endif
-
-
-#if HAVE_SSE2
-extern prototype_postproc_inplace(vp9_mbpost_proc_down_xmm);
-extern prototype_postproc_inplace(vp9_mbpost_proc_across_ip_xmm);
-extern prototype_postproc(vp9_post_proc_down_and_across_xmm);
-extern prototype_postproc_addnoise(vp9_plane_add_noise_wmt);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp9_postproc_down
-#define vp9_postproc_down vp9_mbpost_proc_down_xmm
-
-#undef  vp9_postproc_across
-#define vp9_postproc_across vp9_mbpost_proc_across_ip_xmm
-
-#undef  vp9_postproc_downacross
-#define vp9_postproc_downacross vp9_post_proc_down_and_across_xmm
-
-#undef  vp9_postproc_addnoise
-#define vp9_postproc_addnoise vp9_plane_add_noise_wmt
-
-
-#endif
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_COMMON_X86_VP9_POSTPROC_X86_H_
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index a448bd2..0797168 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1095,6 +1095,40 @@
   return (BITSTREAM_PROFILE) profile;
 }
 
+static void read_bitdepth_colorspace_sampling(
+    VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
+  if (cm->profile >= PROFILE_2)
+    cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10;
+  cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
+  if (cm->color_space != SRGB) {
+    vp9_rb_read_bit(rb);  // [16,235] (including xvycc) vs [0,255] range
+    if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+      cm->subsampling_x = vp9_rb_read_bit(rb);
+      cm->subsampling_y = vp9_rb_read_bit(rb);
+      if (cm->subsampling_x == 1 && cm->subsampling_y == 1)
+        vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                           "4:2:0 color not supported in profile 1 or 3");
+      if (vp9_rb_read_bit(rb))
+        vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                           "Reserved bit set");
+    } else {
+      cm->subsampling_y = cm->subsampling_x = 1;
+    }
+  } else {
+    if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+      // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed.
+      // 4:2:2 or 4:4:0 chroma sampling is not allowed.
+      cm->subsampling_y = cm->subsampling_x = 0;
+      if (vp9_rb_read_bit(rb))
+        vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                           "Reserved bit set");
+    } else {
+      vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                         "4:4:4 color not supported in profile 0 or 2");
+    }
+  }
+}
+
 static size_t read_uncompressed_header(VP9Decoder *pbi,
                                        struct vp9_read_bit_buffer *rb) {
   VP9_COMMON *const cm = &pbi->common;
@@ -1137,32 +1171,8 @@
     if (!vp9_read_sync_code(rb))
       vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                          "Invalid frame sync code");
-    if (cm->profile > PROFILE_1)
-      cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10;
-    cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
-    if (cm->color_space != SRGB) {
-      vp9_rb_read_bit(rb);  // [16,235] (including xvycc) vs [0,255] range
-      if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
-        cm->subsampling_x = vp9_rb_read_bit(rb);
-        cm->subsampling_y = vp9_rb_read_bit(rb);
-        if (vp9_rb_read_bit(rb))
-          vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
-                             "Reserved bit set");
-      } else {
-        cm->subsampling_y = cm->subsampling_x = 1;
-      }
-    } else {
-      if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
-        cm->subsampling_y = cm->subsampling_x = 0;
-        if (vp9_rb_read_bit(rb))
-          vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
-                             "Reserved bit set");
-      } else {
-        vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
-                           "4:4:4 color not supported in profile 0");
-      }
-    }
 
+    read_bitdepth_colorspace_sampling(cm, rb);
     pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1;
 
     for (i = 0; i < REFS_PER_FRAME; ++i) {
@@ -1181,15 +1191,18 @@
       if (!vp9_read_sync_code(rb))
         vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                            "Invalid frame sync code");
+      if (cm->profile > PROFILE_0) {
+        read_bitdepth_colorspace_sampling(cm, rb);
+      } else {
+        // NOTE: The intra-only frame header does not include the specification
+        // of either the color format or color sub-sampling in profile 0. VP9
+        // specifies that the default color space should be YUV 4:2:0 in this
+        // case (normative).
+        cm->color_space = BT_601;
+        cm->subsampling_y = cm->subsampling_x = 1;
+      }
 
       pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
-
-      // NOTE: The intra-only frame header does not include the specification of
-      // either the color format or color sub-sampling. VP9 specifies that the
-      // default color space should be YUV 4:2:0 in this case (normative).
-      cm->color_space = BT_601;
-      cm->subsampling_y = cm->subsampling_x = 1;
-
       setup_frame_size(cm, rb);
     } else {
       pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
@@ -1220,11 +1233,9 @@
   }
 
   if (!cm->error_resilient_mode) {
-    cm->coding_use_prev_mi = 1;
     cm->refresh_frame_context = vp9_rb_read_bit(rb);
     cm->frame_parallel_decoding_mode = vp9_rb_read_bit(rb);
   } else {
-    cm->coding_use_prev_mi = 0;
     cm->refresh_frame_context = 0;
     cm->frame_parallel_decoding_mode = 1;
   }
@@ -1400,7 +1411,7 @@
 
   init_macroblockd(cm, &pbi->mb);
 
-  if (cm->coding_use_prev_mi)
+  if (!cm->error_resilient_mode)
     set_prev_mi(cm);
   else
     cm->prev_mi = NULL;
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 2434aaf..fb605b4 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -78,13 +78,13 @@
     vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
 }
 
-static void write_selected_tx_size(const VP9_COMP *cpi,
+static void write_selected_tx_size(const VP9_COMMON *cm,
+                                   const MACROBLOCKD *xd,
                                    TX_SIZE tx_size, BLOCK_SIZE bsize,
                                    vp9_writer *w) {
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
-  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   const vp9_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
-                                                 &cpi->common.fc.tx_probs);
+                                                 &cm->fc.tx_probs);
   vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
   if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
     vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
@@ -93,14 +93,13 @@
   }
 }
 
-static int write_skip(const VP9_COMP *cpi, int segment_id, const MODE_INFO *mi,
-                      vp9_writer *w) {
-  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  if (vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
+static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                      int segment_id, const MODE_INFO *mi, vp9_writer *w) {
+  if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
     const int skip = mi->mbmi.skip;
-    vp9_write(w, skip, vp9_get_skip_prob(&cpi->common, xd));
+    vp9_write(w, skip, vp9_get_skip_prob(cm, xd));
     return skip;
   }
 }
@@ -121,7 +120,7 @@
 }
 
 static void pack_mb_tokens(vp9_writer *w,
-                           TOKENEXTRA **tp, const TOKENEXTRA *stop) {
+                           TOKENEXTRA **tp, const TOKENEXTRA *const stop) {
   TOKENEXTRA *p = *tp;
 
   while (p < stop && p->token != EOSB_TOKEN) {
@@ -188,9 +187,8 @@
 }
 
 // This function encodes the reference frame
-static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) {
-  const VP9_COMMON *const cm = &cpi->common;
-  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                             vp9_writer *w) {
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int is_compound = has_second_ref(mbmi);
   const int segment_id = mbmi->segment_id;
@@ -252,7 +250,7 @@
     }
   }
 
-  skip = write_skip(cpi, segment_id, mi, w);
+  skip = write_skip(cm, xd, segment_id, mi, w);
 
   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
     vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
@@ -260,7 +258,7 @@
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
       !(is_inter &&
         (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
-    write_selected_tx_size(cpi, mbmi->tx_size, bsize, w);
+    write_selected_tx_size(cm, xd, mbmi->tx_size, bsize, w);
   }
 
   if (!is_inter) {
@@ -281,7 +279,7 @@
   } else {
     const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
     const vp9_prob *const inter_probs = cm->fc.inter_mode_probs[mode_ctx];
-    write_ref_frames(cpi, w);
+    write_ref_frames(cm, xd, w);
 
     // If segment skip is not enabled code the mode.
     if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
@@ -329,10 +327,8 @@
   }
 }
 
-static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,
-                              vp9_writer *w) {
-  const VP9_COMMON *const cm = &cpi->common;
-  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                              MODE_INFO **mi_8x8, vp9_writer *w) {
   const struct segmentation *const seg = &cm->seg;
   const MODE_INFO *const mi = mi_8x8[0];
   const MODE_INFO *const above_mi = mi_8x8[-xd->mi_stride];
@@ -343,10 +339,10 @@
   if (seg->update_map)
     write_segment_id(w, seg, mbmi->segment_id);
 
-  write_skip(cpi, mbmi->segment_id, mi, w);
+  write_skip(cm, xd, mbmi->segment_id, mi, w);
 
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
-    write_selected_tx_size(cpi, mbmi->tx_size, bsize, w);
+    write_selected_tx_size(cm, xd, mbmi->tx_size, bsize, w);
 
   if (bsize >= BLOCK_8X8) {
     write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
@@ -368,9 +364,10 @@
 }
 
 static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
-                          vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
+                          vp9_writer *w, TOKENEXTRA **tok,
+                          const TOKENEXTRA *const tok_end,
                           int mi_row, int mi_col) {
-  VP9_COMMON *const cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   MODE_INFO *m;
 
@@ -382,7 +379,7 @@
                  mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type],
                  cm->mi_rows, cm->mi_cols);
   if (frame_is_intra_only(cm)) {
-    write_mb_modes_kf(cpi, xd->mi, w);
+    write_mb_modes_kf(cm, xd, xd->mi, w);
   } else {
     pack_inter_mode_mvs(cpi, m, w);
   }
@@ -391,7 +388,8 @@
   pack_mb_tokens(w, tok, tok_end);
 }
 
-static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
+static void write_partition(const VP9_COMMON *const cm,
+                            const MACROBLOCKD *const xd,
                             int hbs, int mi_row, int mi_col,
                             PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) {
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -413,17 +411,17 @@
 }
 
 static void write_modes_sb(VP9_COMP *cpi,
-                           const TileInfo *const tile,
-                           vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
+                           const TileInfo *const tile, vp9_writer *w,
+                           TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
                            int mi_row, int mi_col, BLOCK_SIZE bsize) {
-  VP9_COMMON *const cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 
   const int bsl = b_width_log2(bsize);
   const int bs = (1 << bsl) / 4;
   PARTITION_TYPE partition;
   BLOCK_SIZE subsize;
-  MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
+  const MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
@@ -469,8 +467,8 @@
 }
 
 static void write_modes(VP9_COMP *cpi,
-                        const TileInfo *const tile,
-                        vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
+                        const TileInfo *const tile, vp9_writer *w,
+                        TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
   int mi_row, mi_col;
 
   for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
@@ -731,7 +729,7 @@
   }
 }
 
-static void encode_quantization(VP9_COMMON *cm,
+static void encode_quantization(const VP9_COMMON *const cm,
                                 struct vp9_write_bit_buffer *wb) {
   vp9_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
   write_delta_q(wb, cm->y_dc_delta_q);
@@ -739,11 +737,11 @@
   write_delta_q(wb, cm->uv_ac_delta_q);
 }
 
-static void encode_segmentation(VP9_COMP *cpi,
+static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
                                 struct vp9_write_bit_buffer *wb) {
   int i, j;
 
-  struct segmentation *seg = &cpi->common.seg;
+  const struct segmentation *seg = &cm->seg;
 
   vp9_wb_write_bit(wb, seg->enabled);
   if (!seg->enabled)
@@ -753,7 +751,7 @@
   vp9_wb_write_bit(wb, seg->update_map);
   if (seg->update_map) {
     // Select the coding strategy (temporal or spatial)
-    vp9_choose_segmap_coding_method(cpi);
+    vp9_choose_segmap_coding_method(cm, xd);
     // Write out probabilities used to decode unpredicted  macro-block segments
     for (i = 0; i < SEG_TREE_PROBS; i++) {
       const int prob = seg->tree_probs[i];
@@ -869,7 +867,8 @@
   }
 }
 
-static void write_tile_info(VP9_COMMON *cm, struct vp9_write_bit_buffer *wb) {
+static void write_tile_info(const VP9_COMMON *const cm,
+                            struct vp9_write_bit_buffer *wb) {
   int min_log2_tile_cols, max_log2_tile_cols, ones;
   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
 
@@ -888,21 +887,17 @@
 }
 
 static int get_refresh_mask(VP9_COMP *cpi) {
-  if (!cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
-      cpi->rc.is_src_frame_alt_ref &&
-      (!cpi->use_svc ||      // Add spatial svc base layer case here
-       (cpi->svc.number_temporal_layers == 1 &&
-        cpi->svc.spatial_layer_id == 0 &&
-        cpi->svc.layer_context[0].gold_ref_idx >=0 &&
-        cpi->oxcf.ss_play_alternate[0]))) {
-    // Preserve the previously existing golden frame and update the frame in
-    // the alt ref slot instead. This is highly specific to the use of
-    // alt-ref as a forward reference, and this needs to be generalized as
-    // other uses are implemented (like RTC/temporal scaling)
-    //
-    // gld_fb_idx and alt_fb_idx need to be swapped for future frames, but
-    // that happens in vp9_encoder.c:update_reference_frames() so that it can
-    // be done outside of the recode loop.
+  if (vp9_preserve_existing_gf(cpi)) {
+    // We have decided to preserve the previously existing golden frame as our
+    // new ARF frame. However, in the short term we leave it in the GF slot and,
+    // if we're updating the GF with the current decoded frame, we save it
+    // instead to the ARF slot.
+    // Later, in the function vp9_encoder.c:vp9_update_reference_frames() we
+    // will swap gld_fb_idx and alt_fb_idx to achieve our objective. We do it
+    // there so that it can be done outside of the recode loop.
+    // Note: This is highly specific to the use of ARF as a forward reference,
+    // and this needs to be generalized as other uses are implemented
+    // (like RTC/temporal scalability).
     return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
            (cpi->refresh_golden_frame << cpi->alt_fb_idx);
   } else {
@@ -1045,6 +1040,29 @@
   }
 }
 
+static void write_bitdepth_colorspace_sampling(
+    VP9_COMMON *const cm, struct vp9_write_bit_buffer *wb) {
+  if (cm->profile >= PROFILE_2) {
+    assert(cm->bit_depth > BITS_8);
+    vp9_wb_write_bit(wb, cm->bit_depth - BITS_10);
+  }
+  vp9_wb_write_literal(wb, cm->color_space, 3);
+  if (cm->color_space != SRGB) {
+    vp9_wb_write_bit(wb, 0);  // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
+    if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+      assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
+      vp9_wb_write_bit(wb, cm->subsampling_x);
+      vp9_wb_write_bit(wb, cm->subsampling_y);
+      vp9_wb_write_bit(wb, 0);  // unused
+    } else {
+      assert(cm->subsampling_x == 1 && cm->subsampling_y == 1);
+    }
+  } else {
+    assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
+    vp9_wb_write_bit(wb, 0);  // unused
+  }
+}
+
 static void write_uncompressed_header(VP9_COMP *cpi,
                                       struct vp9_write_bit_buffer *wb) {
   VP9_COMMON *const cm = &cpi->common;
@@ -1059,25 +1077,8 @@
   vp9_wb_write_bit(wb, cm->error_resilient_mode);
 
   if (cm->frame_type == KEY_FRAME) {
-    const COLOR_SPACE cs = UNKNOWN;
     write_sync_code(wb);
-    if (cm->profile > PROFILE_1) {
-      assert(cm->bit_depth > BITS_8);
-      vp9_wb_write_bit(wb, cm->bit_depth - BITS_10);
-    }
-    vp9_wb_write_literal(wb, cs, 3);
-    if (cs != SRGB) {
-      vp9_wb_write_bit(wb, 0);  // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
-      if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
-        vp9_wb_write_bit(wb, cm->subsampling_x);
-        vp9_wb_write_bit(wb, cm->subsampling_y);
-        vp9_wb_write_bit(wb, 0);  // unused
-      }
-    } else {
-      assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
-      vp9_wb_write_bit(wb, 0);  // unused
-    }
-
+    write_bitdepth_colorspace_sampling(cm, wb);
     write_frame_size(cm, wb);
   } else {
     if (!cm->show_frame)
@@ -1089,6 +1090,11 @@
     if (cm->intra_only) {
       write_sync_code(wb);
 
+      // Note for profile 0, 420 8bpp is assumed.
+      if (cm->profile > PROFILE_0) {
+        write_bitdepth_colorspace_sampling(cm, wb);
+      }
+
       vp9_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
       write_frame_size(cm, wb);
     } else {
@@ -1118,7 +1124,7 @@
 
   encode_loopfilter(&cm->lf, wb);
   encode_quantization(cm, wb);
-  encode_segmentation(cpi, wb);
+  encode_segmentation(cm, &cpi->mb.e_mbd, wb);
 
   write_tile_info(cm, wb);
 }
@@ -1213,8 +1219,6 @@
   uncompressed_hdr_size = vp9_rb_bytes_written(&wb);
   data += uncompressed_hdr_size;
 
-  vp9_compute_update_table();
-
   vp9_clear_system_state();
 
   first_part_size = write_compressed_header(cpi, data);
diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h
index ddfd0ed..8e82d1c 100644
--- a/vp9/encoder/vp9_bitstream.h
+++ b/vp9/encoder/vp9_bitstream.h
@@ -16,11 +16,21 @@
 extern "C" {
 #endif
 
-struct VP9_COMP;
+#include "vp9/encoder/vp9_encoder.h"
 
 void vp9_entropy_mode_init();
 
-void vp9_pack_bitstream(struct VP9_COMP *cpi, uint8_t *dest, size_t *size);
+void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
+
+static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
+  return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
+         cpi->rc.is_src_frame_alt_ref &&
+         (!cpi->use_svc ||      // Add spatial svc base layer case here
+          (is_spatial_svc(cpi) &&
+           cpi->svc.spatial_layer_id == 0 &&
+           cpi->svc.layer_context[0].gold_ref_idx >=0 &&
+           cpi->oxcf.ss_play_alternate[0]));
+}
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 7c82b20..5e89624 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -188,11 +188,9 @@
   }
 }
 
-static void duplicate_mode_info_in_sb(VP9_COMMON * const cm,
-                                     MACROBLOCKD *const xd,
-                                     int mi_row,
-                                     int mi_col,
-                                     BLOCK_SIZE bsize) {
+static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
+                                      int mi_row, int mi_col,
+                                      BLOCK_SIZE bsize) {
   const int block_width = num_8x8_blocks_wide_lookup[bsize];
   const int block_height = num_8x8_blocks_high_lookup[bsize];
   int i, j;
@@ -799,9 +797,7 @@
   }
 }
 
-static void update_stats(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  const MACROBLOCK *const x = &cpi->mb;
+static void update_stats(VP9_COMMON *cm, const MACROBLOCK *x) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MODE_INFO *const mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
@@ -913,7 +909,7 @@
   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
 
   if (output_enabled) {
-    update_stats(cpi);
+    update_stats(&cpi->common, &cpi->mb);
 
     (*tp)->token = EOSB_TOKEN;
     (*tp)++;
@@ -1330,8 +1326,6 @@
                         TOKENEXTRA **tp, int mi_row, int mi_col,
                      int output_enabled, BLOCK_SIZE bsize,
                      PICK_MODE_CONTEXT *ctx) {
-
-
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
   update_state_rt(cpi, ctx, mi_row, mi_col, bsize);
 
@@ -1343,7 +1337,7 @@
 #endif
 
   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
-  update_stats(cpi);
+  update_stats(&cpi->common, &cpi->mb);
 
   (*tp)->token = EOSB_TOKEN;
   (*tp)++;
@@ -1366,7 +1360,6 @@
     return;
 
   if (bsize >= BLOCK_8X8) {
-    MACROBLOCKD *const xd = &cpi->mb.e_mbd;
     const int idx_str = xd->mi_stride * mi_row + mi_col;
     MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -1729,10 +1722,9 @@
 //
 // The min and max are assumed to have been initialized prior to calling this
 // function so repeat calls can accumulate a min and max of more than one sb64.
-static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8,
-                                        BLOCK_SIZE * min_block_size,
-                                        BLOCK_SIZE * max_block_size ) {
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
+                                        BLOCK_SIZE *min_block_size,
+                                        BLOCK_SIZE *max_block_size ) {
   int sb_width_in_blocks = MI_BLOCK_SIZE;
   int sb_height_in_blocks  = MI_BLOCK_SIZE;
   int i, j;
@@ -1787,17 +1779,17 @@
     if (cm->frame_type != KEY_FRAME) {
       MODE_INFO **const prev_mi =
           &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
-      get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size);
+      get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size);
     }
     // Find the min and max partition sizes used in the left SB64
     if (left_in_image) {
       MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
-      get_sb_partition_size_range(cpi, left_sb64_mi, &min_size, &max_size);
+      get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size);
     }
     // Find the min and max partition sizes used in the above SB64.
     if (above_in_image) {
       MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
-      get_sb_partition_size_range(cpi, above_sb64_mi, &min_size, &max_size);
+      get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size);
     }
     // adjust observed min and max
     if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
@@ -1953,6 +1945,42 @@
   {1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4};
 const int qindex_skip_threshold_lookup[BLOCK_SIZES] =
   {0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120};
+const int qindex_split_threshold_lookup[BLOCK_SIZES] =
+  {0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120};
+const int complexity_16x16_blocks_threshold[BLOCK_SIZES] =
+  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6};
+
+typedef enum {
+  MV_ZERO = 0,
+  MV_LEFT = 1,
+  MV_UP = 2,
+  MV_RIGHT = 3,
+  MV_DOWN = 4,
+  MV_INVALID
+} MOTION_DIRECTION;
+
+static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
+  if (fp_byte & FPMB_MOTION_ZERO_MASK) {
+    return MV_ZERO;
+  } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
+    return MV_LEFT;
+  } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
+    return MV_RIGHT;
+  } else if (fp_byte & FPMB_MOTION_UP_MASK) {
+    return MV_UP;
+  } else {
+    return MV_DOWN;
+  }
+}
+
+static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
+                                           MOTION_DIRECTION that_mv) {
+  if (this_mv == that_mv) {
+    return 0;
+  } else {
+    return abs(this_mv - that_mv) == 2 ? 2 : 1;
+  }
+}
 #endif
 
 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
@@ -1978,6 +2006,7 @@
   int64_t sum_rd = 0;
   int do_split = bsize >= BLOCK_8X8;
   int do_rect = 1;
+
   // Override skipping rectangular partition operations for edge blocks
   const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
   const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
@@ -1987,6 +2016,11 @@
   BLOCK_SIZE min_size = cpi->sf.min_partition_size;
   BLOCK_SIZE max_size = cpi->sf.max_partition_size;
 
+#if CONFIG_FP_MB_STATS
+  unsigned int src_diff_var = UINT_MAX;
+  int none_complexity = 0;
+#endif
+
   int partition_none_allowed = !force_horz_split && !force_vert_split;
   int partition_horz_allowed = !force_vert_split && yss <= xss &&
                                bsize >= BLOCK_8X8;
@@ -2038,6 +2072,65 @@
     }
   }
 
+#if CONFIG_FP_MB_STATS
+  if (cpi->use_fp_mb_stats) {
+    set_offsets(cpi, tile, mi_row, mi_col, bsize);
+    src_diff_var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src,
+                                                  mi_row, mi_col, bsize);
+  }
+#endif
+
+#if CONFIG_FP_MB_STATS
+  // Decide whether we shall split directly and skip searching NONE by using
+  // the first pass block statistics
+  if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
+      partition_none_allowed && src_diff_var > 4 &&
+      cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
+    int mb_row = mi_row >> 1;
+    int mb_col = mi_col >> 1;
+    int mb_row_end =
+        MIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
+    int mb_col_end =
+        MIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
+    int r, c;
+
+    // compute a complexity measure, basically measure inconsistency of motion
+    // vectors obtained from the first pass in the current block
+    for (r = mb_row; r < mb_row_end ; r++) {
+      for (c = mb_col; c < mb_col_end; c++) {
+        const int mb_index = r * cm->mb_cols + c;
+
+        MOTION_DIRECTION this_mv;
+        MOTION_DIRECTION right_mv;
+        MOTION_DIRECTION bottom_mv;
+
+        this_mv =
+            get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
+
+        // to its right
+        if (c != mb_col_end - 1) {
+          right_mv = get_motion_direction_fp(
+              cpi->twopass.this_frame_mb_stats[mb_index + 1]);
+          none_complexity += get_motion_inconsistency(this_mv, right_mv);
+        }
+
+        // to its bottom
+        if (r != mb_row_end - 1) {
+          bottom_mv = get_motion_direction_fp(
+              cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
+          none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
+        }
+
+        // do not count its left and top neighbors to avoid double counting
+      }
+    }
+
+    if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
+      partition_none_allowed = 0;
+    }
+  }
+#endif
+
   // PARTITION_NONE
   if (partition_none_allowed) {
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
@@ -2048,6 +2141,7 @@
         this_rate += cpi->partition_cost[pl][PARTITION_NONE];
       }
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
+
       if (sum_rd < best_rd) {
         int64_t stop_thresh = 4096;
         int64_t stop_thresh_rd;
@@ -2078,7 +2172,6 @@
         // stop further splitting in RD optimization
         if (cpi->use_fp_mb_stats && do_split != 0 &&
             cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
-          VP9_COMMON *cm = &cpi->common;
           int mb_row = mi_row >> 1;
           int mb_col = mi_col >> 1;
           int mb_row_end =
@@ -2104,11 +2197,12 @@
             }
           }
           if (skip) {
-            unsigned int var;
-            set_offsets(cpi, tile, mi_row, mi_col, bsize);
-            var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src,
-                                                 mi_row, mi_col, bsize);
-            if (var < 8) {
+            if (src_diff_var == UINT_MAX) {
+              set_offsets(cpi, tile, mi_row, mi_col, bsize);
+              src_diff_var = get_sby_perpixel_diff_variance(
+                  cpi, &cpi->mb.plane[0].src, mi_row, mi_col, bsize);
+            }
+            if (src_diff_var < 8) {
               do_split = 0;
               do_rect = 0;
             }
@@ -2171,6 +2265,7 @@
       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
       sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+
       if (sum_rd < best_rd) {
         best_rate = sum_rate;
         best_dist = sum_dist;
@@ -2283,6 +2378,7 @@
     }
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
+
   // TODO(jbb): This code added so that we avoid static analysis
   // warning related to the fact that best_rd isn't used after this
   // point.  This code should be refactored so that the duplicate
@@ -2602,6 +2698,8 @@
                                  int mi_col, BLOCK_SIZE bsize, int *rate,
                                  int64_t *dist, int do_recon, int64_t best_rd,
                                  PC_TREE *pc_tree) {
+  const SPEED_FEATURES *const sf = &cpi->sf;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2633,18 +2731,18 @@
 
   // Determine partition types in search according to the speed features.
   // The threshold set here has to be of square block size.
-  if (cpi->sf.auto_min_max_partition_size) {
-    partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
-                               bsize >= cpi->sf.min_partition_size);
-    partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
-                                bsize >  cpi->sf.min_partition_size) ||
+  if (sf->auto_min_max_partition_size) {
+    partition_none_allowed &= (bsize <= sf->max_partition_size &&
+                               bsize >= sf->min_partition_size);
+    partition_horz_allowed &= ((bsize <= sf->max_partition_size &&
+                                bsize > sf->min_partition_size) ||
                                 force_horz_split);
-    partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
-                                bsize >  cpi->sf.min_partition_size) ||
+    partition_vert_allowed &= ((bsize <= sf->max_partition_size &&
+                                bsize > sf->min_partition_size) ||
                                 force_vert_split);
-    do_split &= bsize > cpi->sf.min_partition_size;
+    do_split &= bsize > sf->min_partition_size;
   }
-  if (cpi->sf.use_square_partition_only) {
+  if (sf->use_square_partition_only) {
     partition_horz_allowed &= force_horz_split;
     partition_vert_allowed &= force_vert_split;
   }
@@ -2723,7 +2821,7 @@
     } else {
       // skip rectangular partition test when larger block size
       // gives better rd cost
-      if (cpi->sf.less_rectangular_check)
+      if (sf->less_rectangular_check)
         do_rect &= !partition_none_allowed;
     }
   }
@@ -2731,7 +2829,7 @@
   // PARTITION_HORZ
   if (partition_horz_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
-    if (cpi->sf.adaptive_motion_search)
+    if (sf->adaptive_motion_search)
       load_pred_mv(x, ctx);
 
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
@@ -2776,7 +2874,7 @@
   if (partition_vert_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_VERT);
 
-    if (cpi->sf.adaptive_motion_search)
+    if (sf->adaptive_motion_search)
       load_pred_mv(x, ctx);
 
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
@@ -2833,12 +2931,12 @@
     // Check the projected output rate for this SB against it's target
     // and and if necessary apply a Q delta using segmentation to get
     // closer to the target.
-    if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
+    if ((oxcf->aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
       vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
                                     best_rate);
     }
 
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+    if (oxcf->aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               best_rate, best_dist);
 
@@ -2969,9 +3067,10 @@
 
 static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                 int mi_row, TOKENEXTRA **tp) {
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCK *x = &cpi->mb;
-  MACROBLOCKD *xd = &x->e_mbd;
+  SPEED_FEATURES *const sf = &cpi->sf;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
   int mi_col;
 
   // Initialize the left context for the new SB row
@@ -2981,7 +3080,6 @@
   // Code each SB in the row
   for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
        mi_col += MI_BLOCK_SIZE) {
-    MACROBLOCK *x = &cpi->mb;
     int dummy_rate = 0;
     int64_t dummy_dist = 0;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
@@ -2994,7 +3092,7 @@
     vp9_zero(x->pred_mv);
 
     // Set the partition type of the 64X64 block
-    switch (cpi->sf.partition_search_type) {
+    switch (sf->partition_search_type) {
       case VAR_BASED_PARTITION:
         choose_partitioning(cpi, tile, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
@@ -3007,20 +3105,20 @@
         break;
       case VAR_BASED_FIXED_PARTITION:
       case FIXED_PARTITION:
-        bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
-                cpi->sf.always_this_block_size :
+        bsize = sf->partition_search_type == FIXED_PARTITION ?
+                sf->always_this_block_size :
                 get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
         set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
         nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist, cpi->pc_root);
         break;
       case REFERENCE_PARTITION:
-        if (cpi->sf.partition_check ||
+        if (sf->partition_check ||
             !is_background(cpi, tile, mi_row, mi_col)) {
           set_modeinfo_offsets(cm, xd, mi_row, mi_col);
           auto_partition_range(cpi, tile, mi_row, mi_col,
-                               &cpi->sf.min_partition_size,
-                               &cpi->sf.max_partition_size);
+                               &sf->min_partition_size,
+                               &sf->max_partition_size);
           nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                                &dummy_rate, &dummy_dist, 1, INT64_MAX,
                                cpi->pc_root);
@@ -3040,8 +3138,8 @@
 // end RTC play code
 
 static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
-  SPEED_FEATURES *const sf = &cpi->sf;
-  VP9_COMMON *const cm = &cpi->common;
+  const SPEED_FEATURES *const sf = &cpi->sf;
+  const VP9_COMMON *const cm = &cpi->common;
 
   const uint8_t *src = cpi->Source->y_buffer;
   const uint8_t *last_src = cpi->Last_Source->y_buffer;
@@ -3200,19 +3298,19 @@
   vp9_zero(rd_opt->tx_select_diff);
   vp9_zero(rd_opt->tx_select_threshes);
 
-  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 &&
-                           cm->y_dc_delta_q == 0 &&
-                           cm->uv_dc_delta_q == 0 &&
-                           cm->uv_ac_delta_q == 0;
+  xd->lossless = cm->base_qindex == 0 &&
+                 cm->y_dc_delta_q == 0 &&
+                 cm->uv_dc_delta_q == 0 &&
+                 cm->uv_ac_delta_q == 0;
 
   cm->tx_mode = select_tx_mode(cpi);
 
-  cpi->mb.fwd_txm4x4 = cpi->mb.e_mbd.lossless ? vp9_fwht4x4 : vp9_fdct4x4;
-  cpi->mb.itxm_add = cpi->mb.e_mbd.lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
+  x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
+  x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
 
-  if (cpi->mb.e_mbd.lossless) {
-    cpi->mb.optimize = 0;
-    cpi->common.lf.filter_level = 0;
+  if (xd->lossless) {
+    x->optimize = 0;
+    cm->lf.filter_level = 0;
     cpi->zbin_mode_boost_enabled = 0;
   }
 
@@ -3464,7 +3562,8 @@
   MODE_INFO **mi_8x8 = xd->mi;
   MODE_INFO *mi = mi_8x8[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
-  unsigned int segment_id = mbmi->segment_id;
+  const int seg_skip = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
+                                             SEG_LVL_SKIP);
   const int mis = cm->mi_stride;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -3508,7 +3607,7 @@
       vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
                            &xd->block_refs[ref]->sf);
     }
-    if (!cpi->sf.reuse_inter_pred_sby)
+    if (!cpi->sf.reuse_inter_pred_sby || seg_skip)
       vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
 
     vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
@@ -3519,8 +3618,7 @@
       vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
     } else {
       mbmi->skip = 1;
-      if (output_enabled &&
-          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+      if (output_enabled && !seg_skip)
         cm->counts.skip[vp9_get_skip_context(xd)][1]++;
       reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
     }
@@ -3529,9 +3627,7 @@
   if (output_enabled) {
     if (cm->tx_mode == TX_MODE_SELECT &&
         mbmi->sb_type >= BLOCK_8X8  &&
-        !(is_inter_block(mbmi) &&
-            (mbmi->skip ||
-             vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
+        !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
       ++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd),
                       &cm->counts.tx)[mbmi->tx_size];
     } else {
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index b1c5326..ddca25e 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -131,7 +131,7 @@
   }
 
   if (cm->frame_type == KEY_FRAME) {
-    if (!(cpi->use_svc && cpi->svc.number_temporal_layers == 1))
+    if (!is_spatial_svc(cpi))
       cpi->refresh_golden_frame = 1;
     cpi->refresh_alt_ref_frame = 1;
   } else {
@@ -477,7 +477,7 @@
   vp9_init_context_buffers(cm);
   init_macroblockd(cm, xd);
 
-  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+  if (is_spatial_svc(cpi)) {
     if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
                                  cm->width, cm->height,
                                  cm->subsampling_x, cm->subsampling_y,
@@ -524,6 +524,7 @@
 
   cm->profile = oxcf->profile;
   cm->bit_depth = oxcf->bit_depth;
+  cm->color_space = UNKNOWN;
 
   cm->width = oxcf->width;
   cm->height = oxcf->height;
@@ -1564,22 +1565,15 @@
                &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
     ref_cnt_fb(cm->frame_bufs,
                &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
-  } else if (!cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
-             cpi->rc.is_src_frame_alt_ref &&
-             (!cpi->use_svc ||      // Add spatial svc base layer case here
-              (cpi->svc.number_temporal_layers == 1 &&
-               cpi->svc.spatial_layer_id == 0 &&
-               cpi->svc.layer_context[0].gold_ref_idx >=0 &&
-               cpi->oxcf.ss_play_alternate[0]))) {
-    /* Preserve the previously existing golden frame and update the frame in
-     * the alt ref slot instead. This is highly specific to the current use of
-     * alt-ref as a forward reference, and this needs to be generalized as
-     * other uses are implemented (like RTC/temporal scaling)
-     *
-     * The update to the buffer in the alt ref slot was signaled in
-     * vp9_pack_bitstream(), now swap the buffer pointers so that it's treated
-     * as the golden frame next time.
-     */
+  } else if (vp9_preserve_existing_gf(cpi)) {
+    // We have decided to preserve the previously existing golden frame as our
+    // new ARF frame. However, in the short term in function
+    // vp9_bitstream.c::get_refresh_mask() we left it in the GF slot and, if
+    // we're updating the GF with the current decoded frame, we save it to the
+    // ARF slot instead.
+    // We now have to update the ARF with the current frame and swap gld_fb_idx
+    // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
+    // slot and, if we're updating the GF, the current frame becomes the new GF.
     int tmp;
 
     ref_cnt_fb(cm->frame_bufs,
@@ -1589,7 +1583,7 @@
     cpi->alt_fb_idx = cpi->gld_fb_idx;
     cpi->gld_fb_idx = tmp;
 
-    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    if (is_spatial_svc(cpi)) {
       cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx;
       cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx;
     }
@@ -2013,7 +2007,7 @@
     cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
 
   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
-      !(cpi->use_svc && cpi->svc.number_temporal_layers == 1))
+      !is_spatial_svc(cpi))
     cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
 
   if (cpi->alt_is_last)
@@ -2059,9 +2053,7 @@
   // according to the variance
 
   SVC *const svc = &cpi->svc;
-  const int is_spatial_svc = (svc->number_spatial_layers > 1) &&
-                             (svc->number_temporal_layers == 1);
-  TWO_PASS *const twopass = is_spatial_svc ?
+  TWO_PASS *const twopass = is_spatial_svc(cpi) ?
                             &svc->layer_context[svc->spatial_layer_id].twopass
                             : &cpi->twopass;
 
@@ -2167,9 +2159,7 @@
       (cpi->oxcf.frame_parallel_decoding_mode != 0);
 
     // By default, encoder assumes decoder can use prev_mi.
-    cm->coding_use_prev_mi = 1;
     if (cm->error_resilient_mode) {
-      cm->coding_use_prev_mi = 0;
       cm->frame_parallel_decoding_mode = 1;
       cm->reset_frame_context = 0;
       cm->refresh_frame_context = 0;
@@ -2189,7 +2179,7 @@
   // Check if the current frame is skippable for the partition search in the
   // second pass according to the first pass stats
   if (cpi->pass == 2 &&
-      (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
+      (!cpi->use_svc || is_spatial_svc(cpi))) {
     configure_skippable_frame(cpi);
   }
 
@@ -2441,7 +2431,7 @@
   vpx_usec_timer_start(&timer);
 
 #if CONFIG_SPATIAL_SVC
-  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1)
+  if (is_spatial_svc(cpi))
     res = vp9_svc_lookahead_push(cpi, cpi->lookahead, sd, time_stamp, end_time,
                                  frame_flags);
   else
@@ -2453,9 +2443,16 @@
   vpx_usec_timer_mark(&timer);
   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
 
-  if (cm->profile == PROFILE_0 && (subsampling_x != 1 || subsampling_y != 1)) {
+  if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
+      (subsampling_x != 1 || subsampling_y != 1)) {
     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
-                       "Non-4:2:0 color space requires profile >= 1");
+                       "Non-4:2:0 color space requires profile 1 or 3");
+    res = -1;
+  }
+  if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
+      (subsampling_x == 1 && subsampling_y == 1)) {
+    vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
+                       "4:2:0 color space requires profile 0 or 2");
     res = -1;
   }
 
@@ -2564,14 +2561,11 @@
   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
   MV_REFERENCE_FRAME ref_frame;
   int arf_src_index;
-  const int is_spatial_svc = cpi->use_svc &&
-                             (cpi->svc.number_temporal_layers == 1) &&
-                             (cpi->svc.number_spatial_layers > 1);
 
   if (!cpi)
     return -1;
 
-  if (is_spatial_svc && cpi->pass == 2) {
+  if (is_spatial_svc(cpi) && cpi->pass == 2) {
 #if CONFIG_SPATIAL_SVC
     vp9_svc_lookahead_peek(cpi, cpi->lookahead, 0, 1);
 #endif
@@ -2598,7 +2592,7 @@
     assert(arf_src_index <= rc->frames_to_key);
 
 #if CONFIG_SPATIAL_SVC
-    if (is_spatial_svc)
+    if (is_spatial_svc(cpi))
       cpi->source = vp9_svc_lookahead_peek(cpi, cpi->lookahead,
                                            arf_src_index, 0);
     else
@@ -2608,7 +2602,7 @@
       cpi->alt_ref_source = cpi->source;
 
 #if CONFIG_SPATIAL_SVC
-      if (is_spatial_svc && cpi->svc.spatial_layer_id > 0) {
+      if (is_spatial_svc(cpi) && cpi->svc.spatial_layer_id > 0) {
         int i;
         // Reference a hidden frame from a lower layer
         for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) {
@@ -2643,7 +2637,7 @@
     // Get last frame source.
     if (cm->current_video_frame > 0) {
 #if CONFIG_SPATIAL_SVC
-      if (is_spatial_svc)
+      if (is_spatial_svc(cpi))
         cpi->last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0);
       else
 #endif
@@ -2654,7 +2648,7 @@
 
     // Read in the source frame.
 #if CONFIG_SPATIAL_SVC
-    if (is_spatial_svc)
+    if (is_spatial_svc(cpi))
       cpi->source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
     else
 #endif
@@ -2770,13 +2764,13 @@
   }
 
   if (cpi->pass == 1 &&
-      (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
+      (!cpi->use_svc || is_spatial_svc(cpi))) {
     const int lossless = is_lossless_requested(&cpi->oxcf);
     cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
     cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
     vp9_first_pass(cpi);
   } else if (cpi->pass == 2 &&
-      (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
+      (!cpi->use_svc || is_spatial_svc(cpi))) {
     Pass2Encode(cpi, size, dest, frame_flags);
   } else if (cpi->use_svc) {
     SvcEncode(cpi, size, dest, frame_flags);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 2a6c4b3..acff173 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -232,6 +232,7 @@
 #endif
 
   vp8e_tuning tuning;
+  vp9e_tune_content content;
 } VP9EncoderConfig;
 
 static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
@@ -535,10 +536,16 @@
 
 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
 
+static INLINE int is_spatial_svc(const struct VP9_COMP *const cpi) {
+  return cpi->use_svc &&
+         cpi->svc.number_temporal_layers == 1 &&
+         cpi->svc.number_spatial_layers > 1;
+}
+
 static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
   return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
          (cpi->oxcf.play_alternate &&
-          (!(cpi->use_svc && cpi->svc.number_temporal_layers == 1) ||
+          (!is_spatial_svc(cpi) ||
            cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]));
 }
 
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 627de47..5a79f72 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -258,7 +258,7 @@
 }
 
 void vp9_end_first_pass(VP9_COMP *cpi) {
-  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+  if (is_spatial_svc(cpi)) {
     int i;
     for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
       output_stats(&cpi->svc.layer_context[i].twopass.total_stats,
@@ -446,7 +446,7 @@
   set_first_pass_params(cpi);
   vp9_set_quantizer(cm, find_fp_qindex());
 
-  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+  if (is_spatial_svc(cpi)) {
     MV_REFERENCE_FRAME ref_frame = LAST_FRAME;
     const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;
     twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
@@ -615,8 +615,7 @@
                                                 &unscaled_last_source_buf_2d);
 
         // TODO(pengchong): Replace the hard-coded threshold
-        if (raw_motion_error > 25 ||
-            (cpi->use_svc && cpi->svc.number_temporal_layers == 1)) {
+        if (raw_motion_error > 25 || is_spatial_svc(cpi)) {
           // Test last reference frame using the previous best mv as the
           // starting point (best reference) for the search.
           first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
@@ -898,7 +897,7 @@
 
   vp9_extend_frame_borders(new_yv12);
 
-  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+  if (is_spatial_svc(cpi)) {
     vp9_update_reference_frames(cpi);
   } else {
     // Swap frame pointers so last frame refers to the frame we just compressed.
@@ -967,10 +966,8 @@
                                             BPER_MB_NORMBITS) / num_mbs;
     int q;
     int is_svc_upper_layer = 0;
-    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
-        cpi->svc.spatial_layer_id > 0) {
+    if (is_spatial_svc(cpi) && cpi->svc.spatial_layer_id > 0)
       is_svc_upper_layer = 1;
-    }
 
     // Try and pick a max Q that will be high enough to encode the
     // content at the given rate.
@@ -2102,7 +2099,7 @@
       assert(0);
       break;
   }
-  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+  if (is_spatial_svc(cpi)) {
     if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].gold_ref_idx < 0)
       cpi->refresh_golden_frame = 0;
     if (cpi->alt_ref_source == NULL)
@@ -2121,9 +2118,8 @@
 
   int target_rate;
   LAYER_CONTEXT *lc = NULL;
-  const int is_spatial_svc = (cpi->use_svc &&
-                              cpi->svc.number_temporal_layers == 1);
-  if (is_spatial_svc) {
+
+  if (is_spatial_svc(cpi)) {
     lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
     frames_left = (int)(twopass->total_stats.count -
                   lc->current_video_frame_in_layer);
@@ -2151,7 +2147,7 @@
     vp9_rc_set_frame_target(cpi, target_rate);
     cm->frame_type = INTER_FRAME;
 
-    if (is_spatial_svc) {
+    if (is_spatial_svc(cpi)) {
       if (cpi->svc.spatial_layer_id == 0) {
         lc->is_key_frame = 0;
       } else {
@@ -2167,7 +2163,7 @@
 
   vp9_clear_system_state();
 
-  if (is_spatial_svc && twopass->kf_intra_err_min == 0) {
+  if (is_spatial_svc(cpi) && twopass->kf_intra_err_min == 0) {
     twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
     twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
   }
@@ -2175,7 +2171,8 @@
   if (cpi->oxcf.rc_mode == VPX_Q) {
     twopass->active_worst_quality = cpi->oxcf.cq_level;
   } else if (cm->current_video_frame == 0 ||
-             (is_spatial_svc && lc->current_video_frame_in_layer == 0)) {
+             (is_spatial_svc(cpi) &&
+              lc->current_video_frame_in_layer == 0)) {
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
@@ -2201,7 +2198,7 @@
     cm->frame_type = INTER_FRAME;
   }
 
-  if (is_spatial_svc) {
+  if (is_spatial_svc(cpi)) {
     if (cpi->svc.spatial_layer_id == 0) {
       lc->is_key_frame = (cm->frame_type == KEY_FRAME);
       if (lc->is_key_frame)
@@ -2232,7 +2229,7 @@
     }
 
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-    if (!is_spatial_svc)
+    if (!is_spatial_svc(cpi))
       cpi->refresh_golden_frame = 1;
   }
 
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 7a16001..83848e7 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -458,7 +458,7 @@
       vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
                            sf, sf);
 
-      if (cm->coding_use_prev_mi)
+      if (!cm->error_resilient_mode)
         vp9_find_mv_refs(cm, xd, tile, xd->mi[0], ref_frame,
                          candidates, mi_row, mi_col);
       else
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 1a479f1..73c6b89 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1236,7 +1236,7 @@
     cm->frame_type = KEY_FRAME;
     rc->source_alt_ref_active = 0;
 
-    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    if (is_spatial_svc(cpi)) {
       cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1;
       cpi->ref_frame_flags &=
           (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
@@ -1248,7 +1248,7 @@
   } else {
     cm->frame_type = INTER_FRAME;
 
-    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    if (is_spatial_svc(cpi)) {
       LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
       if (cpi->svc.spatial_layer_id == 0) {
         lc->is_key_frame = 0;
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index a9cff1e..633ce08 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -220,8 +220,6 @@
   }
 }
 
-static const int MAX_XSQ_Q10 = 245727;
-
 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
   // NOTE: The tables below must be of the same size.
 
@@ -311,10 +309,10 @@
     *dist = 0;
   } else {
     int d_q10, r_q10;
+    static const uint32_t MAX_XSQ_Q10 = 245727;
     const uint64_t xsq_q10_64 =
         ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
-    const int xsq_q10 = xsq_q10_64 > (uint64_t)MAX_XSQ_Q10 ?
-                        (int)MAX_XSQ_Q10 : (int)xsq_q10_64;
+    const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
     model_rd_norm(xsq_q10, &r_q10, &d_q10);
     *rate = (n * r_q10 + 2) >> 2;
     *dist = (var * (int64_t)d_q10 + 512) >> 10;
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 897ae01..d5676c3 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -110,14 +110,12 @@
   return cost;
 }
 
-static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
-                       MODE_INFO **mi,
+static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
+                       const TileInfo *tile, MODE_INFO **mi,
                        int *no_pred_segcounts,
                        int (*temporal_predictor_count)[2],
                        int *t_unpred_seg_counts,
                        int bw, int bh, int mi_row, int mi_col) {
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   int segment_id;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
@@ -151,14 +149,13 @@
   }
 }
 
-static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
-                          MODE_INFO **mi,
+static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd,
+                          const TileInfo *tile, MODE_INFO **mi,
                           int *no_pred_segcounts,
                           int (*temporal_predictor_count)[2],
                           int *t_unpred_seg_counts,
                           int mi_row, int mi_col,
                           BLOCK_SIZE bsize) {
-  const VP9_COMMON *const cm = &cpi->common;
   const int mis = cm->mi_stride;
   int bw, bh;
   const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
@@ -170,18 +167,18 @@
   bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
 
   if (bw == bs && bh == bs) {
-    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, bs, bs, mi_row, mi_col);
   } else if (bw == bs && bh < bs) {
-    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
-    count_segs(cpi, tile, mi + hbs * mis, no_pred_segcounts,
+    count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
                temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
                mi_row + hbs, mi_col);
   } else if (bw < bs && bh == bs) {
-    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
-    count_segs(cpi, tile, mi + hbs,
+    count_segs(cm, xd, tile, mi + hbs,
                no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts,
                hbs, bs, mi_row, mi_col + hbs);
   } else {
@@ -194,7 +191,7 @@
       const int mi_dc = hbs * (n & 1);
       const int mi_dr = hbs * (n >> 1);
 
-      count_segs_sb(cpi, tile, &mi[mi_dr * mis + mi_dc],
+      count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc],
                     no_pred_segcounts, temporal_predictor_count,
                     t_unpred_seg_counts,
                     mi_row + mi_dr, mi_col + mi_dc, subsize);
@@ -202,8 +199,7 @@
   }
 }
 
-void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
+void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd) {
   struct segmentation *seg = &cm->seg;
 
   int no_pred_cost;
@@ -237,7 +233,7 @@
       MODE_INFO **mi = mi_ptr;
       for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
            mi_col += 8, mi += 8)
-        count_segs_sb(cpi, &tile, mi, no_pred_segcounts,
+        count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts,
                       temporal_predictor_count, t_unpred_seg_counts,
                       mi_row, mi_col, BLOCK_64X64);
     }
diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h
index 50dd562..8c6944a 100644
--- a/vp9/encoder/vp9_segmentation.h
+++ b/vp9/encoder/vp9_segmentation.h
@@ -42,7 +42,7 @@
 void vp9_set_segment_data(struct segmentation *seg, signed char *feature_data,
                           unsigned char abs_delta);
 
-void vp9_choose_segmap_coding_method(VP9_COMP *cpi);
+void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd);
 
 void vp9_reset_segment_features(struct segmentation *seg);
 
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 6ca1bc5..e770f33 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -160,7 +160,7 @@
 }
 
 static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
-                                 int speed) {
+                                 int speed, vp9e_tune_content content) {
   VP9_COMMON *const cm = &cpi->common;
   const int frames_since_key =
       cm->frame_type == KEY_FRAME ? 0 : cpi->rc.frames_since_key;
@@ -275,6 +275,13 @@
   }
 
   if (speed >= 6) {
+    if (content == VP9E_CONTENT_SCREEN) {
+      int i;
+      // Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used
+      for (i = 0; i < BLOCK_SIZES; ++i)
+        sf->inter_mode_mask[i] = INTER_ALL;
+    }
+
     // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
     sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
     sf->search_type_check_frequency = 50;
@@ -392,7 +399,7 @@
       set_good_speed_feature(cpi, cm, sf, oxcf->speed);
       break;
     case REALTIME:
-      set_rt_speed_feature(cpi, sf, oxcf->speed);
+      set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content);
       break;
   }
 
diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c
index 9796d64..530b592 100644
--- a/vp9/encoder/vp9_subexp.c
+++ b/vp9/encoder/vp9_subexp.c
@@ -16,7 +16,24 @@
 
 #define vp9_cost_upd256  ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
 
-static int update_bits[255];
+static const int update_bits[255] = {
+   5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+   6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,  0,
+};
 
 static int recenter_nonneg(int v, int m) {
   if (v > (m << 1))
@@ -61,18 +78,6 @@
   return i;
 }
 
-static int count_term_subexp(int word) {
-  if (word < 16)
-    return 5;
-  if (word < 32)
-    return 6;
-  if (word < 64)
-    return 8;
-  if (word < 129)
-    return 10;
-  return 11;
-}
-
 static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) {
   int delp = remap_prob(newp, oldp);
   return update_bits[delp] * 256;
@@ -111,12 +116,6 @@
   encode_term_subexp(w, delp);
 }
 
-void vp9_compute_update_table() {
-  int i;
-  for (i = 0; i < 254; i++)
-    update_bits[i] = count_term_subexp(i);
-}
-
 int vp9_prob_diff_update_savings_search(const unsigned int *ct,
                                         vp9_prob oldp, vp9_prob *bestp,
                                         vp9_prob upd) {
diff --git a/vp9/encoder/vp9_subexp.h b/vp9/encoder/vp9_subexp.h
index 8e9c0c6..8e02a1d 100644
--- a/vp9/encoder/vp9_subexp.h
+++ b/vp9/encoder/vp9_subexp.h
@@ -16,9 +16,6 @@
 extern "C" {
 #endif
 
-void vp9_compute_update_table();
-
-
 void vp9_write_prob_diff_update(vp9_writer *w,
                                 vp9_prob newp, vp9_prob oldp);
 
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 0e921be..bf949c4 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -222,8 +222,7 @@
 }
 
 int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
-  return cpi->use_svc &&
-         cpi->svc.number_temporal_layers == 1 &&
+  return is_spatial_svc(cpi) &&
          cpi->svc.spatial_layer_id > 0 &&
          cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;
 }
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 6af8510..2eca8fc 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -442,7 +442,7 @@
   }
 
   // Setup scaling factors. Scaling on each of the arnr frames is not supported
-  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+  if (is_spatial_svc(cpi)) {
     // In spatial svc the scaling factors might be less then 1/2. So we will use
     // non-normative scaling.
     int frame_used = 0;
diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
index f31b176..1feed62 100644
--- a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
@@ -31,7 +31,7 @@
   sum_ref3 = _mm256_set1_epi16(0);
   for (i = 0; i < 32 ; i++) {
     // load src and all refs
-    src_reg = _mm256_load_si256((__m256i *)(src));
+    src_reg = _mm256_loadu_si256((__m256i *)(src));
     ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
     ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
     ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
@@ -103,8 +103,8 @@
   sum_ref3 = _mm256_set1_epi16(0);
   for (i = 0; i < 64 ; i++) {
     // load 64 bytes from src and all refs
-    src_reg = _mm256_load_si256((__m256i *)(src));
-    srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
+    src_reg = _mm256_loadu_si256((__m256i *)(src));
+    srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32));
     ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
     ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
     ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
index 34ed186..9aa4da9 100644
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -67,7 +67,7 @@
 #define LOAD_SRC_DST \
   /* load source and destination */ \
   src_reg = _mm256_loadu_si256((__m256i const *) (src)); \
-  dst_reg = _mm256_load_si256((__m256i const *) (dst));
+  dst_reg = _mm256_loadu_si256((__m256i const *) (dst));
 
 #define AVG_NEXT_SRC(src_reg, size_stride) \
   src_next_reg = _mm256_loadu_si256((__m256i const *) \
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 81fe6a6..8e3e885 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -67,7 +67,6 @@
 VP9_COMMON_SRCS-yes += common/vp9_scan.c
 VP9_COMMON_SRCS-yes += common/vp9_scan.h
 
-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_postproc_x86.h
 VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
 VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
 VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_loopfilter_intrin_avx2.c
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index bcdf3aa..6367a97 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -40,6 +40,7 @@
   AQ_MODE                     aq_mode;
   unsigned int                frame_periodic_boost;
   BIT_DEPTH                   bit_depth;
+  vp9e_tune_content           content;
 };
 
 struct extraconfig_map {
@@ -70,6 +71,7 @@
       NO_AQ,                      // aq_mode
       0,                          // frame_periodic_delta_q
       BITS_8,                     // Bit depth
+      VP9E_CONTENT_DEFAULT        // content
     }
   }
 };
@@ -219,6 +221,8 @@
   RANGE_CHECK_HI(extra_cfg, arnr_strength, 6);
   RANGE_CHECK(extra_cfg, arnr_type, 1, 3);
   RANGE_CHECK(extra_cfg, cq_level, 0, 63);
+  RANGE_CHECK(extra_cfg, content,
+              VP9E_CONTENT_DEFAULT, VP9E_CONTENT_INVALID - 1);
 
   // TODO(yaowu): remove this when ssim tuning is implemented for vp9
   if (extra_cfg->tuning == VP8_TUNE_SSIM)
@@ -397,6 +401,7 @@
   oxcf->arnr_type       = extra_cfg->arnr_type;
 
   oxcf->tuning = extra_cfg->tuning;
+  oxcf->content = extra_cfg->content;
 
   oxcf->tile_columns = extra_cfg->tile_columns;
   oxcf->tile_rows    = extra_cfg->tile_rows;
@@ -799,6 +804,20 @@
   return index_sz;
 }
 
+// vp9 uses 10,000,000 ticks/second as time stamp
+#define TICKS_PER_SEC 10000000
+
+static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase,
+                                       int64_t n) {
+  return n * TICKS_PER_SEC * timebase->num / timebase->den;
+}
+
+static int64_t ticks_to_timebase_units(const vpx_rational_t *timebase,
+                                       int64_t n) {
+  const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
+  return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
+}
+
 static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
                                       const vpx_image_t *img,
                                       vpx_codec_pts_t pts,
@@ -806,6 +825,7 @@
                                       vpx_enc_frame_flags_t flags,
                                       unsigned long deadline) {
   vpx_codec_err_t res = VPX_CODEC_OK;
+  const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
 
   if (img != NULL) {
     res = validate_img(ctx, img);
@@ -851,7 +871,9 @@
   if (res == VPX_CODEC_OK && ctx->cpi != NULL) {
     unsigned int lib_flags = 0;
     YV12_BUFFER_CONFIG sd;
-    int64_t dst_time_stamp, dst_end_time_stamp;
+    int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
+    int64_t dst_end_time_stamp =
+        timebase_units_to_ticks(timebase, pts + duration);
     size_t size, cx_data_sz;
     unsigned char *cx_data;
 
@@ -859,12 +881,6 @@
     if (ctx->base.init_flags & VPX_CODEC_USE_PSNR)
       ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1;
 
-    /* vp9 use 10,000,000 ticks/second as time stamp */
-    dst_time_stamp = (pts * 10000000 * ctx->cfg.g_timebase.num)
-                     / ctx->cfg.g_timebase.den;
-    dst_end_time_stamp = (pts + duration) * 10000000 * ctx->cfg.g_timebase.num /
-                         ctx->cfg.g_timebase.den;
-
     if (img != NULL) {
       res = image2yuvconfig(img, &sd);
 
@@ -901,19 +917,18 @@
                                          cx_data, &dst_time_stamp,
                                          &dst_end_time_stamp, !img)) {
       if (size) {
-        vpx_codec_pts_t round, delta;
-        vpx_codec_cx_pkt_t pkt;
         VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
+        vpx_codec_cx_pkt_t pkt;
 
 #if CONFIG_SPATIAL_SVC
-        if (cpi->use_svc && cpi->svc.number_temporal_layers == 1)
+        if (is_spatial_svc(cpi))
           cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size;
 #endif
 
         // Pack invisible frames with the next visible frame
         if (cpi->common.show_frame == 0
 #if CONFIG_SPATIAL_SVC
-            || (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
+            || (is_spatial_svc(cpi) &&
                 cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
 #endif
             ) {
@@ -928,20 +943,16 @@
         }
 
         // Add the frame packet to the list of returned packets.
-        round = (vpx_codec_pts_t)10000000 * ctx->cfg.g_timebase.num / 2 - 1;
-        delta = (dst_end_time_stamp - dst_time_stamp);
         pkt.kind = VPX_CODEC_CX_FRAME_PKT;
-        pkt.data.frame.pts =
-          (dst_time_stamp * ctx->cfg.g_timebase.den + round)
-          / ctx->cfg.g_timebase.num / 10000000;
-        pkt.data.frame.duration = (unsigned long)
-          ((delta * ctx->cfg.g_timebase.den + round)
-          / ctx->cfg.g_timebase.num / 10000000);
+        pkt.data.frame.pts = ticks_to_timebase_units(timebase, dst_time_stamp);
+        pkt.data.frame.duration =
+           (unsigned long)ticks_to_timebase_units(timebase,
+               dst_end_time_stamp - dst_time_stamp);
         pkt.data.frame.flags = lib_flags << 16;
 
         if (lib_flags & FRAMEFLAGS_KEY
 #if CONFIG_SPATIAL_SVC
-            || (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
+            || (is_spatial_svc(cpi) &&
                 cpi->svc.layer_context[0].is_key_frame)
 #endif
             )
@@ -954,9 +965,8 @@
           // prior PTS so that if a decoder uses pts to schedule when
           // to do this, we start right after last frame was decoded.
           // Invisible frames have no duration.
-          pkt.data.frame.pts = ((cpi->last_time_stamp_seen
-                                 * ctx->cfg.g_timebase.den + round)
-                                / ctx->cfg.g_timebase.num / 10000000) + 1;
+          pkt.data.frame.pts =
+              ticks_to_timebase_units(timebase, cpi->last_time_stamp_seen) + 1;
           pkt.data.frame.duration = 0;
         }
 
@@ -983,7 +993,7 @@
         cx_data += size;
         cx_data_sz -= size;
 #if CONFIG_SPATIAL_SVC
-        if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+        if (is_spatial_svc(cpi)) {
           vpx_codec_cx_pkt_t pkt = {0};
           int i;
           pkt.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
@@ -1213,6 +1223,13 @@
   return VPX_CODEC_OK;
 }
 
+static vpx_codec_err_t ctrl_set_tune_content(vpx_codec_alg_priv_t *ctx,
+                                             va_list args) {
+  struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.content = CAST(VP9E_SET_TUNE_CONTENT, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP8_COPY_REFERENCE,                ctrl_copy_reference},
   {VP8E_UPD_ENTROPY,                  ctrl_update_entropy},
@@ -1245,6 +1262,7 @@
   {VP9E_SET_SVC,                      ctrl_set_svc},
   {VP9E_SET_SVC_PARAMETERS,           ctrl_set_svc_parameters},
   {VP9E_SET_SVC_LAYER_ID,             ctrl_set_svc_layer_id},
+  {VP9E_SET_TUNE_CONTENT,             ctrl_set_tune_content},
 
   // Getters
   {VP8E_GET_LAST_QUANTIZER,           ctrl_get_quantizer},
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 75cb8a1..893ab19 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -40,6 +40,7 @@
   void                   *decrypt_state;
   vpx_image_t             img;
   int                     img_avail;
+  int                     flushed;
   int                     invert_tile_order;
   int                     frame_parallel_decode;  // frame-based threading.
 
@@ -69,6 +70,7 @@
     ctx->priv->alg_priv = alg_priv;
     ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
     ctx->priv->init_flags = ctx->init_flags;
+    ctx->priv->alg_priv->flushed = 0;
     ctx->priv->alg_priv->frame_parallel_decode =
         (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING);
 
@@ -96,6 +98,30 @@
   return VPX_CODEC_OK;
 }
 
+static int parse_bitdepth_colorspace_sampling(
+    BITSTREAM_PROFILE profile, struct vp9_read_bit_buffer *rb) {
+  const int sRGB = 7;
+  int colorspace;
+  if (profile >= PROFILE_2)
+    rb->bit_offset += 1;  // Bit-depth 10 or 12.
+  colorspace = vp9_rb_read_literal(rb, 3);
+  if (colorspace != sRGB) {
+    rb->bit_offset += 1;  // [16,235] (including xvycc) vs [0,255] range.
+    if (profile == PROFILE_1 || profile == PROFILE_3) {
+      rb->bit_offset += 2;  // subsampling x/y.
+      rb->bit_offset += 1;  // unused.
+    }
+  } else {
+    if (profile == PROFILE_1 || profile == PROFILE_3) {
+      rb->bit_offset += 1;  // unused
+    } else {
+      // RGB is only available in version 1.
+      return 0;
+    }
+  }
+  return 1;
+}
+
 static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data,
                                                 unsigned int data_sz,
                                                 vpx_codec_stream_info_t *si,
@@ -142,37 +168,24 @@
     error_resilient = vp9_rb_read_bit(&rb);
 
     if (si->is_kf) {
-      const int sRGB = 7;
-      int colorspace;
-
       if (!vp9_read_sync_code(&rb))
         return VPX_CODEC_UNSUP_BITSTREAM;
 
-      if (profile > PROFILE_1)
-        rb.bit_offset += 1;  // Bit-depth 10 or 12
-      colorspace = vp9_rb_read_literal(&rb, 3);
-      if (colorspace != sRGB) {
-        rb.bit_offset += 1;  // [16,235] (including xvycc) vs [0,255] range
-        if (profile == PROFILE_1 || profile == PROFILE_3) {
-          rb.bit_offset += 2;  // subsampling x/y
-          rb.bit_offset += 1;  // unused
-        }
-      } else {
-        if (profile == PROFILE_1 || profile == PROFILE_3) {
-          rb.bit_offset += 1;  // unused
-        } else {
-          // RGB is only available in version 1
-          return VPX_CODEC_UNSUP_BITSTREAM;
-        }
-      }
+      if (!parse_bitdepth_colorspace_sampling(profile, &rb))
+        return VPX_CODEC_UNSUP_BITSTREAM;
       vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
     } else {
       intra_only_flag = show_frame ? 0 : vp9_rb_read_bit(&rb);
+
       rb.bit_offset += error_resilient ? 0 : 2;  // reset_frame_context
 
       if (intra_only_flag) {
         if (!vp9_read_sync_code(&rb))
           return VPX_CODEC_UNSUP_BITSTREAM;
+        if (profile > PROFILE_0) {
+          if (!parse_bitdepth_colorspace_sampling(profile, &rb))
+            return VPX_CODEC_UNSUP_BITSTREAM;
+        }
         rb.bit_offset += REF_FRAMES;  // refresh_frame_flags
         vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
       }
@@ -403,8 +416,13 @@
   uint32_t frame_sizes[8];
   int frame_count;
 
-  if (data == NULL || data_sz == 0)
-    return VPX_CODEC_INVALID_PARAM;
+  if (data == NULL && data_sz == 0) {
+    ctx->flushed = 1;
+    return VPX_CODEC_OK;
+  }
+
+  // Reset flushed when receiving a valid frame.
+  ctx->flushed = 0;
 
   res = parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
                                ctx->decrypt_cb, ctx->decrypt_state);
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 36c587f..796a7a1 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -205,7 +205,8 @@
    *                     layer and 0..#vpx_codec_enc_cfg::ts_number_layers for
    *                     temporal layer.
    */
-  VP9E_SET_SVC_LAYER_ID
+  VP9E_SET_SVC_LAYER_ID,
+  VP9E_SET_TUNE_CONTENT
 };
 
 /*!\brief vpx 1-D scaling mode
@@ -277,6 +278,12 @@
   VP8_EIGHT_TOKENPARTITION = 3
 } vp8e_token_partitions;
 
+/*!brief VP9 encoder content type */
+typedef enum {
+  VP9E_CONTENT_DEFAULT,
+  VP9E_CONTENT_SCREEN,
+  VP9E_CONTENT_INVALID
+} vp9e_tune_content;
 
 /*!\brief VP8 model tuning parameters
  *
@@ -370,6 +377,7 @@
 
 VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PERIODIC_BOOST, unsigned int)
 
+VPX_CTRL_USE_TYPE(VP9E_SET_TUNE_CONTENT, int) /* vp9e_tune_content */
 /*! @} - end defgroup vp8_encoder */
 #ifdef __cplusplus
 }  // extern "C"