Merge "Hiding struct diff in *.c file."
diff --git a/build/make/Makefile b/build/make/Makefile
index c4d53f1..63ec271 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -19,6 +19,7 @@
         done
 all: .DEFAULT
 clean:: .DEFAULT
+exampletest: .DEFAULT
 install:: .DEFAULT
 test:: .DEFAULT
 testdata:: .DEFAULT
@@ -105,6 +106,8 @@
 
 .PHONY: dist
 dist:
+.PHONY: exampletest
+exampletest:
 .PHONY: install
 install::
 .PHONY: test
diff --git a/build/make/configure.sh b/build/make/configure.sh
index c07b049..ad7dc82 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1284,8 +1284,8 @@
     local makefile=$2
     shift 2
     for cfg; do
-        upname="`toupper $cfg`"
         if enabled $cfg; then
+            upname="`toupper $cfg`"
             echo "${prefix}_${upname}=yes" >> $makefile
         fi
     done
diff --git a/configure b/configure
index bd95056..cc4ca02 100755
--- a/configure
+++ b/configure
@@ -265,8 +265,9 @@
     unistd_h
 "
 EXPERIMENT_LIST="
-    multiple_arf
     alpha
+    multiple_arf
+    spatial_svc
 "
 CONFIG_LIST="
     external_build
diff --git a/examples.mk b/examples.mk
index 28ab33a..e09681e 100644
--- a/examples.mk
+++ b/examples.mk
@@ -67,15 +67,17 @@
 endif
 vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 vpxenc.DESCRIPTION           = Full featured encoder
-EXAMPLES-$(CONFIG_VP9_ENCODER)      += vp9_spatial_svc_encoder.c
-vp9_spatial_svc_encoder.SRCS        += args.c args.h
-vp9_spatial_svc_encoder.SRCS        += ivfenc.c ivfenc.h
-vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
-vp9_spatial_svc_encoder.SRCS        += video_common.h
-vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
-vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
-vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
-vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
+ifeq ($(CONFIG_SPATIAL_SVC),yes)
+  EXAMPLES-$(CONFIG_VP9_ENCODER)      += vp9_spatial_svc_encoder.c
+  vp9_spatial_svc_encoder.SRCS        += args.c args.h
+  vp9_spatial_svc_encoder.SRCS        += ivfenc.c ivfenc.h
+  vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
+  vp9_spatial_svc_encoder.SRCS        += video_common.h
+  vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
+  vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
+  vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
+  vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
+endif
 
 ifneq ($(CONFIG_SHARED),yes)
 EXAMPLES-$(CONFIG_VP9_ENCODER)    += resize_util.c
diff --git a/libs.mk b/libs.mk
index 85c5f8a..d02e9bc 100644
--- a/libs.mk
+++ b/libs.mk
@@ -115,7 +115,7 @@
   CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
   CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
   INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
-  INSTALL-LIBS-yes += include/vpx/svc_context.h
+  INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h
   INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
   CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
   CODEC_DOC_SECTIONS += vp9 vp9_encoder
@@ -556,3 +556,26 @@
 utiltest:
 	@echo Unit tests must be enabled to make the utiltest target.
 endif
+
+##
+## Example tests.
+##
+ifeq ($(CONFIG_UNIT_TESTS),yes)
+# All non-MSVC targets output example targets in a sub dir named examples.
+EXAMPLES_BIN_PATH = examples
+ifeq ($(CONFIG_MSVS),yes)
+# MSVC will build both Debug and Release configurations of the examples in a
+# sub directory named for the current target. Assume the user wants to
+# run the Release tools, and assign EXAMPLES_BIN_PATH accordingly.
+# TODO(tomfinegan): Is this adequate for ARM?
+# TODO(tomfinegan): Support running the debug versions of tools?
+EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
+endif
+exampletest: examples testdata
+	$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
+		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
+		--bin-path $(EXAMPLES_BIN_PATH)
+else
+exampletest:
+	@echo Unit tests must be enabled to make the exampletest target.
+endif
diff --git a/test/examples.sh b/test/examples.sh
index ac2a18c..7ba9cce 100755
--- a/test/examples.sh
+++ b/test/examples.sh
@@ -24,5 +24,6 @@
 
 for test in ${example_tests}; do
   # Source each test script so that exporting variables can be avoided.
+  VPX_TEST_NAME="$(basename ${test%.*})"
   . "${test}"
 done
diff --git a/test/test.mk b/test/test.mk
index 44d2f9c..f0a27c7 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -113,10 +113,13 @@
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
 
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
+endif
+
 endif # VP9
 
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += sad_test.cc
diff --git a/test/tools_common.sh b/test/tools_common.sh
index 9c10d48..472111c 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -302,8 +302,13 @@
 # functions and are run unconditionally. Functions in positional parameter two
 # are run according to the rules specified in vpx_test_usage().
 run_tests() {
-  env_tests="verify_vpx_test_environment ${1}"
-  tests_to_filter="${2}"
+  local env_tests="verify_vpx_test_environment $1"
+  local tests_to_filter="$2"
+  local test_name="${VPX_TEST_NAME}"
+
+  if [ -z "${test_name}" ]; then
+    test_name="$(basename \"${0%.*}\")"
+  fi
 
   if [ "${VPX_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then
     # Filter out DISABLED tests.
@@ -315,7 +320,7 @@
     tests_to_filter=$(filter_strings "${tests_to_filter}" ${VPX_TEST_FILTER})
   fi
 
-  tests_to_run="${env_tests} ${tests_to_filter}"
+  local tests_to_run="${env_tests} ${tests_to_filter}"
 
   check_git_hashes
 
@@ -328,8 +333,8 @@
     test_end "${test}"
   done
 
-  tested_config="$(test_configuration_target) @ $(current_hash)"
-  echo $(basename "${0%.*}"): Done, all tests pass for ${tested_config}.
+  local tested_config="$(test_configuration_target) @ $(current_hash)"
+  echo "${test_name}: Done, all tests pass for ${tested_config}."
 }
 
 vpx_test_usage() {
diff --git a/test/vp9_spatial_svc_encoder.sh b/test/vp9_spatial_svc_encoder.sh
new file mode 100755
index 0000000..635cfa2
--- /dev/null
+++ b/test/vp9_spatial_svc_encoder.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx vp9_spatial_svc_encoder example. To add new
+##  tests to to this file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to vp9_spatial_svc_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+vp9_spatial_svc_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs vp9_spatial_svc_encoder. $1 is the test name.
+vp9_spatial_svc_encoder() {
+  local encoder="${LIBVPX_BIN_PATH}/vp9_spatial_svc_encoder"
+  encoder="${encoder}${VPX_TEST_EXE_SUFFIX}"
+  local test_name="$1"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/vp9_ssvc_encoder${test_name}.ivf"
+  local frames_to_encode="10"
+  local max_kf="9999"
+
+  shift
+
+  [ -x "${encoder}" ] || return 1
+
+  eval "${encoder}" -w "${YUV_RAW_INPUT_WIDTH}" -h "${YUV_RAW_INPUT_HEIGHT}" \
+      -k "${max_kf}" -f "${frames_to_encode}" "$@" "${YUV_RAW_INPUT}" \
+      "${output_file}" \
+      ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+# Each mode is run with layer count 1-$vp9_ssvc_test_layers.
+vp9_ssvc_test_layers=5
+
+vp9_spatial_svc_mode_i() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    local test_name="${FUNCNAME}"
+    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
+      vp9_spatial_svc_encoder "${test_name}" -m i -l ${layers}
+    done
+  fi
+}
+
+vp9_spatial_svc_mode_altip() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    local test_name="${FUNCNAME}"
+    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
+      vp9_spatial_svc_encoder "${test_name}" -m "alt-ip" -l ${layers}
+    done
+  fi
+}
+
+vp9_spatial_svc_mode_ip() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    local test_name="${FUNCNAME}"
+    vp9_spatial_svc_encoder "${test_name}" -m ip -l 1
+  fi
+}
+
+vp9_spatial_svc_mode_gf() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    local test_name="${FUNCNAME}"
+    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
+      vp9_spatial_svc_encoder "${test_name}" -m gf -l ${layers}
+    done
+  fi
+}
+
+vp9_spatial_svc_tests="vp9_spatial_svc_mode_i
+                       vp9_spatial_svc_mode_altip
+                       vp9_spatial_svc_mode_ip
+                       vp9_spatial_svc_mode_gf"
+
+run_tests vp9_spatial_svc_encoder_verify_environment "${vp9_spatial_svc_tests}"
diff --git a/test/vpx_temporal_svc_encoder.sh b/test/vpx_temporal_svc_encoder.sh
new file mode 100755
index 0000000..ff64740
--- /dev/null
+++ b/test/vpx_temporal_svc_encoder.sh
@@ -0,0 +1,283 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx vpx_temporal_svc_encoder example. To add new
+##  tests to this file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to vpx_tsvc_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+vpx_tsvc_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs vpx_temporal_svc_encoder using the codec specified by $1 and output file
+# name by $2. Additional positional parameters are passed directly to
+# vpx_temporal_svc_encoder.
+vpx_tsvc_encoder() {
+  local encoder="${LIBVPX_BIN_PATH}/vpx_temporal_svc_encoder"
+  encoder="${encoder}${VPX_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file_base="$2"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/${output_file_base}"
+  local timebase_num="1"
+  local timebase_den="1000"
+  local speed="6"
+  local frame_drop_thresh="30"
+
+  shift 2
+
+  [ -x "${encoder}" ] || return 1
+
+  eval "${encoder}" "${YUV_RAW_INPUT}" "${output_file}" "${codec}" \
+      "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
+      "${timebase_num}" "${timebase_den}" "${speed}" "${frame_drop_thresh}" \
+      "$@" \
+      ${devnull}
+}
+
+# Confirms that all expected output files exist given the output file name
+# passed to vpx_temporal_svc_encoder.
+# The file name passed to vpx_temporal_svc_encoder is joined with the stream
+# number and the extension .ivf to produce per stream output files.  Here $1 is
+# file name, and $2 is expected number of files.
+files_exist() {
+  local file_name="${VPX_TEST_OUTPUT_DIR}/$1"
+  local num_files="$(($2 - 1))"
+  for stream_num in $(seq 0 ${num_files}); do
+    [ -e "${file_name}_${stream_num}.ivf" ] || return 1
+  done
+}
+
+# Run vpx_temporal_svc_encoder in all supported modes for vp8 and vp9.
+
+vpx_tsvc_encoder_vp8_mode_0() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 0 200 || return 1
+    # Mode 0 produces 1 stream
+    files_exist "${FUNCNAME}" 1 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_1() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 1 200 400 || return 1
+    # Mode 1 produces 2 streams
+    files_exist "${FUNCNAME}" 2 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_2() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 2 200 400 || return 1
+    # Mode 2 produces 2 streams
+    files_exist "${FUNCNAME}" 2 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_3() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 3 200 400 600 || return 1
+    # Mode 3 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_4() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 4 200 400 600 || return 1
+    # Mode 4 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_5() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 5 200 400 600 || return 1
+    # Mode 5 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_6() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 6 200 400 600 || return 1
+    # Mode 6 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_7() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 7 200 400 600 800 1000 || return 1
+    # Mode 7 produces 5 streams
+    files_exist "${FUNCNAME}" 5 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_8() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 8 200 400 || return 1
+    # Mode 8 produces 2 streams
+    files_exist "${FUNCNAME}" 2 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_9() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 9 200 400 600 || return 1
+    # Mode 9 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_10() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 10 200 400 600 || return 1
+    # Mode 10 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp8_mode_11() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp8 "${FUNCNAME}" 11 200 400 600 || return 1
+    # Mode 11 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_0() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 0 200 || return 1
+    # Mode 0 produces 1 stream
+    files_exist "${FUNCNAME}" 1 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_1() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 1 200 400 || return 1
+    # Mode 1 produces 2 streams
+    files_exist "${FUNCNAME}" 2 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_2() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 2 200 400 || return 1
+    # Mode 2 produces 2 streams
+    files_exist "${FUNCNAME}" 2 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_3() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 3 200 400 600 || return 1
+    # Mode 3 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_4() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 4 200 400 600 || return 1
+    # Mode 4 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_5() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 5 200 400 600 || return 1
+    # Mode 5 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_6() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 6 200 400 600 || return 1
+    # Mode 6 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_7() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 7 200 400 600 800 1000 || return 1
+    # Mode 7 produces 5 streams
+    files_exist "${FUNCNAME}" 5 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_8() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 8 200 400 || return 1
+    # Mode 8 produces 2 streams
+    files_exist "${FUNCNAME}" 2 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_9() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 9 200 400 600 || return 1
+    # Mode 9 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_10() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 10 200 400 600 || return 1
+    # Mode 10 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_vp9_mode_11() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_tsvc_encoder vp9 "${FUNCNAME}" 11 200 400 600 || return 1
+    # Mode 11 produces 3 streams
+    files_exist "${FUNCNAME}" 3 || return 1
+  fi
+}
+
+vpx_tsvc_encoder_tests="vpx_tsvc_encoder_vp8_mode_0
+                        vpx_tsvc_encoder_vp8_mode_1
+                        vpx_tsvc_encoder_vp8_mode_2
+                        vpx_tsvc_encoder_vp8_mode_3
+                        vpx_tsvc_encoder_vp8_mode_4
+                        vpx_tsvc_encoder_vp8_mode_5
+                        vpx_tsvc_encoder_vp8_mode_6
+                        vpx_tsvc_encoder_vp8_mode_7
+                        vpx_tsvc_encoder_vp8_mode_8
+                        vpx_tsvc_encoder_vp8_mode_9
+                        vpx_tsvc_encoder_vp8_mode_10
+                        vpx_tsvc_encoder_vp8_mode_11
+                        vpx_tsvc_encoder_vp9_mode_0
+                        vpx_tsvc_encoder_vp9_mode_1
+                        vpx_tsvc_encoder_vp9_mode_2
+                        vpx_tsvc_encoder_vp9_mode_3
+                        vpx_tsvc_encoder_vp9_mode_4
+                        vpx_tsvc_encoder_vp9_mode_5
+                        vpx_tsvc_encoder_vp9_mode_6
+                        vpx_tsvc_encoder_vp9_mode_7
+                        vpx_tsvc_encoder_vp9_mode_8
+                        vpx_tsvc_encoder_vp9_mode_9
+                        vpx_tsvc_encoder_vp9_mode_10
+                        vpx_tsvc_encoder_vp9_mode_11"
+
+run_tests vpx_tsvc_encoder_verify_environment "${vpx_tsvc_encoder_tests}"
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
index 5b22b94..2134676 100644
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -395,6 +395,23 @@
     %assign n_arg_names %0
 %endmacro
 
+%if ARCH_X86_64
+%macro ALLOC_STACK 2  ; stack_size, num_regs
+  %assign %%stack_aligment ((mmsize + 15) & ~15)
+  %assign stack_size_padded %1
+
+  %assign %%reg_num (%2 - 1)
+  %xdefine rsp_tmp r %+ %%reg_num
+  mov  rsp_tmp, rsp
+  sub  rsp, stack_size_padded
+  and  rsp, ~(%%stack_aligment - 1)
+%endmacro
+
+%macro RESTORE_STACK 0  ; reset rsp register
+  mov  rsp, rsp_tmp
+%endmacro
+%endif
+
 %if WIN64 ; Windows x64 ;=================================================
 
 DECLARE_REG 0,  rcx, ecx,  cx,   cl
diff --git a/vp8/common/x86/variance_impl_mmx.asm b/vp8/common/x86/variance_impl_mmx.asm
index d9120d0..7d5e681 100644
--- a/vp8/common/x86/variance_impl_mmx.asm
+++ b/vp8/common/x86/variance_impl_mmx.asm
@@ -342,8 +342,8 @@
         movsxd      rdx, dword ptr arg(3) ;[recon_stride]
 
         ; Row 1
-        movq        mm0, [rax]                  ; Copy eight bytes to mm0
-        movq        mm1, [rbx]                  ; Copy eight bytes to mm1
+        movd        mm0, [rax]                  ; Copy four bytes to mm0
+        movd        mm1, [rbx]                  ; Copy four bytes to mm1
         punpcklbw   mm0, mm6                    ; unpack to higher prrcision
         punpcklbw   mm1, mm6
         psubsw      mm0, mm1                    ; A-B (low order) to MM0
@@ -351,12 +351,12 @@
         pmaddwd     mm0, mm0                    ; square and accumulate
         add         rbx,rdx                     ; Inc pointer into ref data
         add         rax,rcx                     ; Inc pointer into the new data
-        movq        mm1, [rbx]                  ; Copy eight bytes to mm1
+        movd        mm1, [rbx]                  ; Copy four bytes to mm1
         paddd       mm7, mm0                    ; accumulate in mm7
 
 
         ; Row 2
-        movq        mm0, [rax]                  ; Copy eight bytes to mm0
+        movd        mm0, [rax]                  ; Copy four bytes to mm0
         punpcklbw   mm0, mm6                    ; unpack to higher prrcision
         punpcklbw   mm1, mm6
         psubsw      mm0, mm1                    ; A-B (low order) to MM0
@@ -365,12 +365,12 @@
         pmaddwd     mm0, mm0                    ; square and accumulate
         add         rbx,rdx                     ; Inc pointer into ref data
         add         rax,rcx                     ; Inc pointer into the new data
-        movq        mm1, [rbx]                  ; Copy eight bytes to mm1
+        movd        mm1, [rbx]                  ; Copy four bytes to mm1
         paddd       mm7, mm0                    ; accumulate in mm7
 
         ; Row 3
-        movq        mm0, [rax]                  ; Copy eight bytes to mm0
-        punpcklbw   mm0, mm6                    ; unpack to higher prrcision
+        movd        mm0, [rax]                  ; Copy four bytes to mm0
+        punpcklbw   mm0, mm6                    ; unpack to higher precision
         punpcklbw   mm1, mm6
         psubsw      mm0, mm1                    ; A-B (low order) to MM0
         paddw       mm5, mm0                    ; accumulate differences in mm5
@@ -378,11 +378,11 @@
         pmaddwd     mm0, mm0                    ; square and accumulate
         add         rbx,rdx                     ; Inc pointer into ref data
         add         rax,rcx                     ; Inc pointer into the new data
-        movq        mm1, [rbx]                  ; Copy eight bytes to mm1
+        movd        mm1, [rbx]                  ; Copy four bytes to mm1
         paddd       mm7, mm0                    ; accumulate in mm7
 
         ; Row 4
-        movq        mm0, [rax]                  ; Copy eight bytes to mm0
+        movd        mm0, [rax]                  ; Copy four bytes to mm0
 
         punpcklbw   mm0, mm6                    ; unpack to higher prrcision
         punpcklbw   mm1, mm6
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 1037bfb..1827396 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -772,7 +772,7 @@
 add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_full_range_search/;
 
-add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
 specialize qw/vp9_temporal_filter_apply sse2/;
 
 }
diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c
index 78909dd..8c4a303 100644
--- a/vp9/common/vp9_tile_common.c
+++ b/vp9/common/vp9_tile_common.c
@@ -21,13 +21,21 @@
   return MIN(offset, mis);
 }
 
-void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) {
+void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) {
   tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows);
   tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows);
+}
+
+void vp9_tile_set_col(TileInfo *tile, const VP9_COMMON *cm, int col) {
   tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols);
   tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols);
 }
 
+void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) {
+  vp9_tile_set_row(tile, cm, row);
+  vp9_tile_set_col(tile, cm, col);
+}
+
 void vp9_get_tile_n_bits(int mi_cols,
                          int *min_log2_tile_cols, int *max_log2_tile_cols) {
   const int sb_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2;
diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h
index a97719e..ae58805 100644
--- a/vp9/common/vp9_tile_common.h
+++ b/vp9/common/vp9_tile_common.h
@@ -27,6 +27,9 @@
 void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm,
                    int row, int col);
 
+void vp9_tile_set_row(TileInfo *tile, const struct VP9Common *cm, int row);
+void vp9_tile_set_col(TileInfo *tile, const struct VP9Common *cm, int col);
+
 void vp9_get_tile_n_bits(int mi_cols,
                          int *min_log2_tile_cols, int *max_log2_tile_cols);
 
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 3124158..de58939 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -675,64 +675,6 @@
   setup_display_size(cm, rb);
 }
 
-static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
-                        int do_loopfilter_inline, vp9_reader *r) {
-  const int num_threads = pbi->max_threads;
-  VP9_COMMON *const cm = &pbi->common;
-  int mi_row, mi_col;
-  MACROBLOCKD *xd = &pbi->mb;
-
-  if (do_loopfilter_inline) {
-    LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
-    lf_data->frame_buffer = get_frame_new_buffer(cm);
-    lf_data->cm = cm;
-    vp9_copy(lf_data->planes, pbi->mb.plane);
-    lf_data->stop = 0;
-    lf_data->y_only = 0;
-    vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
-  }
-
-  for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
-       mi_row += MI_BLOCK_SIZE) {
-    // For a SB there are 2 left contexts, each pertaining to a MB row within
-    vp9_zero(xd->left_context);
-    vp9_zero(xd->left_seg_context);
-    for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
-         mi_col += MI_BLOCK_SIZE) {
-      decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
-    }
-
-    if (do_loopfilter_inline) {
-      const int lf_start = mi_row - MI_BLOCK_SIZE;
-      LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
-
-      // delay the loopfilter by 1 macroblock row.
-      if (lf_start < 0) continue;
-
-      // decoding has completed: finish up the loop filter in this thread.
-      if (mi_row + MI_BLOCK_SIZE >= tile->mi_row_end) continue;
-
-      vp9_worker_sync(&pbi->lf_worker);
-      lf_data->start = lf_start;
-      lf_data->stop = mi_row;
-      if (num_threads > 1) {
-        vp9_worker_launch(&pbi->lf_worker);
-      } else {
-        vp9_worker_execute(&pbi->lf_worker);
-      }
-    }
-  }
-
-  if (do_loopfilter_inline) {
-    LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
-
-    vp9_worker_sync(&pbi->lf_worker);
-    lf_data->start = lf_data->stop;
-    lf_data->stop = cm->mi_rows;
-    vp9_worker_execute(&pbi->lf_worker);
-  }
-}
-
 static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
   int min_log2_tile_cols, max_log2_tile_cols, max_ones;
   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
@@ -811,16 +753,35 @@
 
 static const uint8_t *decode_tiles(VP9Decoder *pbi,
                                    const uint8_t *data,
-                                   const uint8_t *data_end,
-                                   int do_loopfilter_inline) {
+                                   const uint8_t *data_end) {
   VP9_COMMON *const cm = &pbi->common;
   const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
   TileBuffer tile_buffers[4][1 << 6];
   int tile_row, tile_col;
-  const uint8_t *end = NULL;
-  vp9_reader r;
+  int mi_row, mi_col;
+  TileData *tile_data = NULL;
+
+  if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) {
+    CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
+                    vpx_memalign(32, sizeof(LFWorkerData)));
+    pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
+    if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
+      vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+                         "Loop filter thread creation failed");
+    }
+  }
+
+  if (cm->lf.filter_level) {
+    LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+    lf_data->frame_buffer = get_frame_new_buffer(cm);
+    lf_data->cm = cm;
+    vp9_copy(lf_data->planes, pbi->mb.plane);
+    lf_data->stop = 0;
+    lf_data->y_only = 0;
+    vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
+  }
 
   assert(tile_rows <= 4);
   assert(tile_cols <= (1 << 6));
@@ -835,26 +796,88 @@
 
   get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
 
-  // Decode tiles using data from tile_buffers
+  if (pbi->tile_data == NULL ||
+      (tile_cols * tile_rows) != pbi->total_tiles) {
+    vpx_free(pbi->tile_data);
+    CHECK_MEM_ERROR(
+        cm,
+        pbi->tile_data,
+        vpx_malloc(tile_cols * tile_rows * (sizeof(*pbi->tile_data))));
+    pbi->total_tiles = tile_rows * tile_cols;
+  }
+
+  // Load all tile information into tile_data.
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col;
-      const int last_tile = tile_row == tile_rows - 1 &&
-                                 col == tile_cols - 1;
-      const TileBuffer *const buf = &tile_buffers[tile_row][col];
       TileInfo tile;
-
-      vp9_tile_init(&tile, cm, tile_row, col);
-      setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r,
-                          pbi->decrypt_cb, pbi->decrypt_state);
-      decode_tile(pbi, &tile, do_loopfilter_inline, &r);
-
-      if (last_tile)
-        end = vp9_reader_find_end(&r);
+      const TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
+      tile_data = pbi->tile_data + tile_cols * tile_row + tile_col;
+      tile_data->cm = cm;
+      tile_data->xd = pbi->mb;
+      tile_data->xd.corrupted = 0;
+      vp9_tile_init(&tile, tile_data->cm, tile_row, tile_col);
+      setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
+                          &tile_data->bit_reader, pbi->decrypt_cb,
+                          pbi->decrypt_state);
+      init_macroblockd(cm, &tile_data->xd);
+      vp9_zero(tile_data->xd.dqcoeff);
     }
   }
 
-  return end;
+  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+    TileInfo tile;
+    vp9_tile_set_row(&tile, cm, tile_row);
+    for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end;
+         mi_row += MI_BLOCK_SIZE) {
+      for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+        const int col = pbi->inv_tile_order ?
+                        tile_cols - tile_col - 1 : tile_col;
+        tile_data = pbi->tile_data + tile_cols * tile_row + col;
+        vp9_tile_set_col(&tile, tile_data->cm, col);
+        vp9_zero(tile_data->xd.left_context);
+        vp9_zero(tile_data->xd.left_seg_context);
+        for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
+             mi_col += MI_BLOCK_SIZE) {
+          decode_partition(tile_data->cm, &tile_data->xd, &tile, mi_row, mi_col,
+                           &tile_data->bit_reader, BLOCK_64X64);
+        }
+      }
+      // Loopfilter one row.
+      if (cm->lf.filter_level) {
+        const int lf_start = mi_row - MI_BLOCK_SIZE;
+        LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+
+        // delay the loopfilter by 1 macroblock row.
+        if (lf_start < 0) continue;
+
+        // decoding has completed: finish up the loop filter in this thread.
+        if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue;
+
+        vp9_worker_sync(&pbi->lf_worker);
+        lf_data->start = lf_start;
+        lf_data->stop = mi_row;
+        if (pbi->max_threads > 1) {
+          vp9_worker_launch(&pbi->lf_worker);
+        } else {
+          vp9_worker_execute(&pbi->lf_worker);
+        }
+      }
+    }
+  }
+
+  // Loopfilter remaining rows in the frame.
+  if (cm->lf.filter_level) {
+    LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+    vp9_worker_sync(&pbi->lf_worker);
+    lf_data->start = lf_data->stop;
+    lf_data->stop = cm->mi_rows;
+    vp9_worker_execute(&pbi->lf_worker);
+  }
+
+  // Get last tile data.
+  tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
+
+  return vp9_reader_find_end(&tile_data->bit_reader);
 }
 
 static int tile_worker_hook(void *arg1, void *arg2) {
@@ -1279,7 +1302,6 @@
     const uint8_t *data,
     const uint8_t *data_end,
     uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) {
-  vp9_zero(*rb);
   rb->bit_offset = 0;
   rb->error_handler = error_handler;
   rb->error_handler_data = &pbi->common;
@@ -1300,7 +1322,7 @@
                      const uint8_t **p_data_end) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
-  struct vp9_read_bit_buffer rb;
+  struct vp9_read_bit_buffer rb = { 0 };
   uint8_t clear_data[MAX_VP9_HEADER_SIZE];
   const size_t first_partition_size = read_uncompressed_header(pbi,
       init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
@@ -1308,8 +1330,6 @@
   const int tile_rows = 1 << cm->log2_tile_rows;
   const int tile_cols = 1 << cm->log2_tile_cols;
   YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
-  const int do_loopfilter_inline = tile_rows == 1 && tile_cols == 1 &&
-                                   cm->lf.filter_level;
   xd->cur_buf = new_fb;
 
   if (!first_partition_size) {
@@ -1352,19 +1372,7 @@
     // to do parallel loopfiltering.
     vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
   } else {
-    if (do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
-      CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
-                      vpx_memalign(32, sizeof(LFWorkerData)));
-      pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
-      if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
-        vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
-                           "Loop filter thread creation failed");
-      }
-    }
-    *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end,
-                               do_loopfilter_inline);
-    if (!do_loopfilter_inline)
-      vp9_loop_filter_frame(new_fb, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+    *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
   }
 
   new_fb->corrupted |= xd->corrupted;
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 98b890b..e1292c2 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -90,6 +90,7 @@
   vp9_remove_common(cm);
   vp9_worker_end(&pbi->lf_worker);
   vpx_free(pbi->lf_worker.data1);
+  vpx_free(pbi->tile_data);
   for (i = 0; i < pbi->num_tile_workers; ++i) {
     VP9Worker *const worker = &pbi->tile_workers[i];
     vp9_worker_end(worker);
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index d6cb507..36fb7ea 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -27,6 +27,13 @@
 extern "C" {
 #endif
 
+// TODO(hkuang): combine this with TileWorkerData.
+typedef struct TileData {
+  VP9_COMMON *cm;
+  vp9_reader bit_reader;
+  DECLARE_ALIGNED(16, MACROBLOCKD, xd);
+} TileData;
+
 typedef struct VP9Decoder {
   DECLARE_ALIGNED(16, MACROBLOCKD, mb);
 
@@ -40,10 +47,12 @@
   int decoded_key_frame;
 
   VP9Worker lf_worker;
-
   VP9Worker *tile_workers;
   int num_tile_workers;
 
+  TileData *tile_data;
+  int total_tiles;
+
   VP9LfSync lf_row_sync;
 
   vpx_decrypt_cb decrypt_cb;
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index e12aac2..af4358b 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -33,13 +33,7 @@
   int is_coded;
   int num_4x4_blk;
   int skip;
-  int_mv best_ref_mv[2];
-  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
-  int rate;
-  int distortion;
   int best_mode_index;
-  int rddiv;
-  int rdmult;
   int hybrid_pred_diff;
   int comp_pred_diff;
   int single_pred_diff;
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 6ffbd3f..47c9019 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -411,8 +411,8 @@
   // Default value is 1. From first pass stats, encode_breakout may be disabled.
   ENCODE_BREAKOUT_TYPE allow_encode_breakout;
 
-  // Get threshold from external input. In real time mode, it can be
-  // overwritten according to encoding speed.
+  // Get threshold from external input. A suggested threshold is 800 for HD
+  // clips, and 300 for < HD clips.
   int encode_breakout;
 
   unsigned char *segmentation_map;
@@ -438,7 +438,7 @@
   uint64_t time_pick_lpf;
   uint64_t time_encode_sb_row;
 
-  struct twopass_rc twopass;
+  TWO_PASS twopass;
 
   YV12_BUFFER_CONFIG alt_ref_buffer;
   YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 04f03e2..c5562d5 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -46,6 +46,9 @@
 #define GF_RMAX    96.0
 #define ERR_DIVISOR   150.0
 #define MIN_DECAY_FACTOR 0.1
+#define SVC_FACTOR_PT_LOW 0.45
+#define FACTOR_PT_LOW 0.5
+#define FACTOR_PT_HIGH 0.9
 
 #define KF_MB_INTRA_MIN 150
 #define GF_MB_INTRA_MIN 100
@@ -79,12 +82,12 @@
 
 // Resets the first pass file to the given position using a relative seek from
 // the current position.
-static void reset_fpf_position(struct twopass_rc *p,
+static void reset_fpf_position(TWO_PASS *p,
                                const FIRSTPASS_STATS *position) {
   p->stats_in = position;
 }
 
-static int lookup_next_frame_stats(const struct twopass_rc *p,
+static int lookup_next_frame_stats(const TWO_PASS *p,
                                    FIRSTPASS_STATS *next_frame) {
   if (p->stats_in >= p->stats_in_end)
     return EOF;
@@ -95,7 +98,7 @@
 
 
 // Read frame stats at an offset from the current position.
-static int read_frame_stats(const struct twopass_rc *p,
+static int read_frame_stats(const TWO_PASS *p,
                             FIRSTPASS_STATS *frame_stats, int offset) {
   const FIRSTPASS_STATS *fps_ptr = p->stats_in;
 
@@ -112,7 +115,7 @@
   return 1;
 }
 
-static int input_stats(struct twopass_rc *p, FIRSTPASS_STATS *fps) {
+static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
   if (p->stats_in >= p->stats_in_end)
     return EOF;
 
@@ -258,7 +261,7 @@
 // harder frames.
 static double calculate_modified_err(const VP9_COMP *cpi,
                                      const FIRSTPASS_STATS *this_frame) {
-  const struct twopass_rc *twopass = &cpi->twopass;
+  const TWO_PASS *twopass = &cpi->twopass;
   const SVC *const svc = &cpi->svc;
   const FIRSTPASS_STATS *stats;
   double av_err;
@@ -501,7 +504,7 @@
   int new_mv_count = 0;
   int sum_in_vectors = 0;
   uint32_t lastmv_as_int = 0;
-  struct twopass_rc *twopass = &cpi->twopass;
+  TWO_PASS *twopass = &cpi->twopass;
   const MV zero_mv = {0, 0};
   const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
 
@@ -938,8 +941,8 @@
     for (q = rc->best_quality; q < rc->worst_quality; ++q) {
       const double factor =
           calc_correction_factor(err_per_mb, ERR_DIVISOR,
-                                 is_svc_upper_layer ? 0.8 : 0.5,
-                                 is_svc_upper_layer ? 1.0 : 0.90, q);
+                                 is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
+                                 FACTOR_PT_LOW, FACTOR_PT_HIGH, q);
       const int bits_per_mb = vp9_rc_bits_per_mb(INTER_FRAME, q,
                                                  factor * speed_term);
       if (bits_per_mb <= target_norm_bits_per_mb)
@@ -960,7 +963,7 @@
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   const int is_spatial_svc = (svc->number_spatial_layers > 1) &&
                              (svc->number_temporal_layers == 1);
-  struct twopass_rc *const twopass = is_spatial_svc ?
+  TWO_PASS *const twopass = is_spatial_svc ?
       &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;
   double frame_rate;
   FIRSTPASS_STATS *stats;
@@ -1054,7 +1057,7 @@
 // Function to test for a condition where a complex transition is followed
 // by a static section. For example in slide shows where there is a fade
 // between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(struct twopass_rc *twopass,
+static int detect_transition_to_still(TWO_PASS *twopass,
                                       int frame_interval, int still_interval,
                                       double loop_decay_rate,
                                       double last_decay_rate) {
@@ -1092,7 +1095,7 @@
 // This function detects a flash through the high relative pcnt_second_ref
 // score in the frame following a flash frame. The offset passed in should
 // reflect this.
-static int detect_flash(const struct twopass_rc *twopass, int offset) {
+static int detect_flash(const TWO_PASS *twopass, int offset) {
   FIRSTPASS_STATS next_frame;
 
   int flash_detected = 0;
@@ -1178,7 +1181,7 @@
                           int f_frames, int b_frames,
                           int *f_boost, int *b_boost) {
   FIRSTPASS_STATS this_frame;
-  struct twopass_rc *const twopass = &cpi->twopass;
+  TWO_PASS *const twopass = &cpi->twopass;
   int i;
   double boost_score = 0.0;
   double mv_ratio_accumulator = 0.0;
@@ -1400,7 +1403,7 @@
 #endif
 
 // Calculate a section intra ratio used in setting max loop filter.
-static void calculate_section_intra_ratio(struct twopass_rc *twopass,
+static void calculate_section_intra_ratio(TWO_PASS *twopass,
                                           const FIRSTPASS_STATS *start_pos,
                                           int section_length) {
   FIRSTPASS_STATS next_frame;
@@ -1430,7 +1433,7 @@
 static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi,
                                              double gf_group_err) {
   const RATE_CONTROL *const rc = &cpi->rc;
-  const struct twopass_rc *const twopass = &cpi->twopass;
+  const TWO_PASS *const twopass = &cpi->twopass;
   const int max_bits = frame_max_bits(rc, &cpi->oxcf);
   int64_t total_group_bits;
 
@@ -1481,7 +1484,7 @@
 static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
-  struct twopass_rc *const twopass = &cpi->twopass;
+  TWO_PASS *const twopass = &cpi->twopass;
   FIRSTPASS_STATS next_frame;
   const FIRSTPASS_STATS *start_pos;
   int i;
@@ -1771,7 +1774,7 @@
 
 // Allocate bits to a normal frame that is neither a gf an arf or a key frame.
 static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
-  struct twopass_rc *twopass = &cpi->twopass;
+  TWO_PASS *twopass = &cpi->twopass;
   // For a single frame.
   const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
   // Calculate modified prediction error used in bit allocation.
@@ -1800,7 +1803,7 @@
   vp9_rc_set_frame_target(cpi, target_frame_size);
 }
 
-static int test_candidate_kf(struct twopass_rc *twopass,
+static int test_candidate_kf(TWO_PASS *twopass,
                              const FIRSTPASS_STATS *last_frame,
                              const FIRSTPASS_STATS *this_frame,
                              const FIRSTPASS_STATS *next_frame) {
@@ -1880,7 +1883,7 @@
 static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   int i, j;
   RATE_CONTROL *const rc = &cpi->rc;
-  struct twopass_rc *const twopass = &cpi->twopass;
+  TWO_PASS *const twopass = &cpi->twopass;
   const FIRSTPASS_STATS first_frame = *this_frame;
   const FIRSTPASS_STATS *start_position = twopass->stats_in;
   FIRSTPASS_STATS next_frame;
@@ -2125,7 +2128,7 @@
 void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
-  struct twopass_rc *const twopass = &cpi->twopass;
+  TWO_PASS *const twopass = &cpi->twopass;
   int frames_left;
   FIRSTPASS_STATS this_frame;
   FIRSTPASS_STATS this_frame_copy;
@@ -2189,18 +2192,21 @@
     // Define next KF group and assign bits to it.
     this_frame_copy = this_frame;
     find_next_key_frame(cpi, &this_frame_copy);
-    // Don't place key frame in any enhancement layers in spatial svc
-    if (is_spatial_svc) {
-      lc->is_key_frame = 1;
-      if (cpi->svc.spatial_layer_id > 0) {
-        cm->frame_type = INTER_FRAME;
+  } else {
+    cm->frame_type = INTER_FRAME;
+  }
+
+  if (is_spatial_svc) {
+    if (cpi->svc.spatial_layer_id == 0) {
+      lc->is_key_frame = (cm->frame_type == KEY_FRAME);
+    } else {
+      cm->frame_type = INTER_FRAME;
+      lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
+
+      if (lc->is_key_frame) {
+        cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
       }
     }
-  } else {
-    if (is_spatial_svc) {
-      lc->is_key_frame = 0;
-    }
-    cm->frame_type = INTER_FRAME;
   }
 
   // Is this frame a GF / ARF? (Note: a key frame is always also a GF).
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 02a3d1f..d84793e 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -38,7 +38,7 @@
   int64_t spatial_layer_id;
 } FIRSTPASS_STATS;
 
-struct twopass_rc {
+typedef struct {
   unsigned int section_intra_rating;
   unsigned int next_iiratio;
   FIRSTPASS_STATS total_stats;
@@ -76,7 +76,7 @@
   int gf_zeromotion_pct;
 
   int active_worst_quality;
-};
+} TWO_PASS;
 
 struct VP9_COMP;
 
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 5e87d28..041e583 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -235,7 +235,7 @@
 
   int mb_col, mb_row, offset = 0;
   int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
-  MV arf_top_mv = {0, 0}, gld_top_mv = {0, 0};
+  MV gld_top_mv = {0, 0};
   MODE_INFO mi_local;
 
   vp9_zero(mi_local);
@@ -253,7 +253,7 @@
   mi_local.mbmi.ref_frame[1] = NONE;
 
   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
-    MV arf_left_mv = arf_top_mv, gld_left_mv = gld_top_mv;
+    MV gld_left_mv = gld_top_mv;
     int mb_y_in_offset  = mb_y_offset;
     int arf_y_in_offset = arf_y_offset;
     int gld_y_in_offset = gld_y_offset;
@@ -270,10 +270,8 @@
       update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
                               golden_ref, &gld_left_mv, alt_ref,
                               mb_row, mb_col);
-      arf_left_mv = mb_stats->ref[ALTREF_FRAME].m.mv.as_mv;
       gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
       if (mb_col == 0) {
-        arf_top_mv = arf_left_mv;
         gld_top_mv = gld_left_mv;
       }
       xd->left_available = 1;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index d5379f6..3877e66 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -148,7 +148,8 @@
 
 static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
                               MACROBLOCK *x, MACROBLOCKD *xd,
-                              int *out_rate_sum, int64_t *out_dist_sum) {
+                              int *out_rate_sum, int64_t *out_dist_sum,
+                              unsigned int *var_y, unsigned int *sse_y) {
   // Note our transform coeffs are 8 times an orthogonal transform.
   // Hence quantizer step is also 8 times. To get effective quantizer
   // we need to divide by 8 before sending to modeling function.
@@ -162,6 +163,9 @@
   unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                            pd->dst.buf, pd->dst.stride, &sse);
 
+  *var_y = var;
+  *sse_y = sse;
+
   // TODO(jingning) This is a temporary solution to account for frames with
   // light changes. Need to customize the rate-distortion modeling for non-RD
   // mode decision.
@@ -198,6 +202,9 @@
 
   int rate = INT_MAX;
   int64_t dist = INT64_MAX;
+  // var_y and sse_y are saved to be used in skipping checking
+  unsigned int var_y = UINT_MAX;
+  unsigned int sse_y = UINT_MAX;
 
   VP9_COMMON *cm = &cpi->common;
   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
@@ -219,8 +226,7 @@
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
   x->skip = 0;
-  if (!x->in_active_map)
-    x->skip = 1;
+
   // initialize mode decisions
   *returnrate = INT_MAX;
   *returndistortion = INT64_MAX;
@@ -320,6 +326,8 @@
            (mbmi->mv[0].as_mv.col & 0x07) != 0)) {
         int pf_rate[3];
         int64_t pf_dist[3];
+        unsigned int pf_var[3];
+        unsigned int pf_sse[3];
         int64_t best_cost = INT64_MAX;
         INTERP_FILTER best_filter = SWITCHABLE, filter;
 
@@ -328,7 +336,7 @@
           mbmi->interp_filter = filter;
           vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
           model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter],
-                            &pf_dist[filter]);
+                            &pf_dist[filter], &pf_var[filter], &pf_sse[filter]);
           cost = RDCOST(x->rdmult, x->rddiv,
                         vp9_get_switchable_rate(cpi) + pf_rate[filter],
                         pf_dist[filter]);
@@ -341,10 +349,12 @@
         mbmi->interp_filter = best_filter;
         rate = pf_rate[mbmi->interp_filter];
         dist = pf_dist[mbmi->interp_filter];
+        var_y = pf_var[mbmi->interp_filter];
+        sse_y = pf_sse[mbmi->interp_filter];
       } else {
         mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
         vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
       }
 
       rate += rate_mv;
@@ -352,7 +362,78 @@
                                 [INTER_OFFSET(this_mode)];
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
 
-      if (this_rd < best_rd) {
+      // Skipping checking: test to see if this block can be reconstructed by
+      // prediction only.
+      if (!x->in_active_map) {
+        x->skip = 1;
+      } else if (cpi->allow_encode_breakout && x->encode_breakout) {
+        const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
+        unsigned int var = var_y, sse = sse_y;
+        // Skipping threshold for ac.
+        unsigned int thresh_ac;
+        // Skipping threshold for dc.
+        unsigned int thresh_dc;
+        // Set a maximum for threshold to avoid big PSNR loss in low bit rate
+        // case. Use extreme low threshold for static frames to limit skipping.
+        const unsigned int max_thresh = 36000;
+        // The encode_breakout input
+        const unsigned int min_thresh =
+            MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
+
+        // Calculate threshold according to dequant value.
+        thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
+        thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
+
+        // Adjust ac threshold according to partition size.
+        thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
+            b_height_log2_lookup[bsize]);
+
+        thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
+
+        // Y skipping condition checking for ac and dc.
+        if (var <= thresh_ac && (sse - var) <= thresh_dc) {
+          unsigned int sse_u, sse_v;
+          unsigned int var_u, var_v;
+
+          // Skip u v prediction for less calculation, that won't affect
+          // result much.
+          var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
+                                          x->plane[1].src.stride,
+                                          xd->plane[1].dst.buf,
+                                          xd->plane[1].dst.stride, &sse_u);
+
+          // U skipping condition checking
+          if ((var_u * 4 <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
+            var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
+                                            x->plane[2].src.stride,
+                                            xd->plane[2].dst.buf,
+                                            xd->plane[2].dst.stride, &sse_v);
+
+            // V skipping condition checking
+            if ((var_v * 4 <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
+              x->skip = 1;
+
+              // The cost of skip bit needs to be added.
+              rate = rate_mv;
+              rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
+                                           [INTER_OFFSET(this_mode)];
+
+              // More on this part of rate
+              // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+
+              // Scaling factor for SSE from spatial domain to frequency
+              // domain is 16. Adjust distortion accordingly.
+              // TODO(yunqingwang): In this function, only y-plane dist is
+              // calculated.
+              dist = (sse << 4);  // + ((sse_u + sse_v) << 4);
+              this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+              // *disable_skip = 1;
+            }
+          }
+        }
+      }
+
+      if (this_rd < best_rd || x->skip) {
         best_rd = this_rd;
         *returnrate = rate;
         *returndistortion = dist;
@@ -360,6 +441,9 @@
         best_pred_filter = mbmi->interp_filter;
         best_ref_frame = ref_frame;
       }
+
+      if (x->skip)
+        break;
     }
   }
 
@@ -371,14 +455,15 @@
 
   // Perform intra prediction search, if the best SAD is above a certain
   // threshold.
-  if (best_rd > inter_mode_thresh && bsize < cpi->sf.max_intra_bsize) {
+  if (!x->skip && best_rd > inter_mode_thresh &&
+      bsize < cpi->sf.max_intra_bsize) {
     for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
       vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
                               mbmi->tx_size, this_mode,
                               &p->src.buf[0], p->src.stride,
                               &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
 
-      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
       rate += cpi->mbmode_cost[this_mode];
       rate += intra_cost_penalty;
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 0ceedf6..f701cf0 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2222,10 +2222,6 @@
   ctx->skip = x->skip;
   ctx->best_mode_index = mode_index;
   ctx->mic = *xd->mi[0];
-
-  ctx->best_ref_mv[0].as_int = ref_mv->as_int;
-  ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
-
   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 93e23ee..7b2d1e2 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -146,7 +146,6 @@
                                  int speed) {
   sf->static_segmentation = 0;
   sf->adaptive_rd_thresh = 1;
-  sf->encode_breakout_thresh = 1;
   sf->use_fast_coef_costing = 1;
 
   if (speed == 1) {
@@ -169,7 +168,6 @@
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->encode_breakout_thresh = 8;
   }
 
   if (speed >= 2) {
@@ -208,7 +206,6 @@
     sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->encode_breakout_thresh = 200;
   }
 
   if (speed >= 3) {
@@ -226,7 +223,6 @@
     sf->optimize_coefficients = 0;
     sf->disable_split_mask = DISABLE_ALL_SPLIT;
     sf->lpf_pick = LPF_PICK_FROM_Q;
-    sf->encode_breakout_thresh = 700;
   }
 
   if (speed >= 4) {
@@ -245,7 +241,6 @@
     }
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY;
     sf->frame_parameter_update = 0;
-    sf->encode_breakout_thresh = 1000;
     sf->search_method = FAST_HEX;
     sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV);
     sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV));
@@ -338,7 +333,6 @@
   sf->use_fast_coef_costing = 0;
   sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
   sf->use_nonrd_pick_mode = 0;
-  sf->encode_breakout_thresh = 0;
   for (i = 0; i < BLOCK_SIZES; ++i)
     sf->disable_inter_mode_mask[i] = 0;
   sf->max_intra_bsize = BLOCK_64X64;
@@ -384,10 +378,6 @@
 
   cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1;
 
-  if (cpi->encode_breakout && oxcf->mode == REALTIME &&
-      sf->encode_breakout_thresh > cpi->encode_breakout)
-    cpi->encode_breakout = sf->encode_breakout_thresh;
-
   if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
     sf->adaptive_pred_interp_filter = 0;
 
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 46806c9..d8c1a8b 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -321,10 +321,6 @@
   // This flag controls the use of non-RD mode decision.
   int use_nonrd_pick_mode;
 
-  // This variable sets the encode_breakout threshold. Currently, it is only
-  // enabled in real time mode.
-  int encode_breakout_thresh;
-
   // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
   // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
   int disable_inter_mode_mask[BLOCK_SIZES];
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 2e98fa7..dd28496 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -203,7 +203,7 @@
   int i;
 
   for (i = 0; i < svc->number_spatial_layers; ++i) {
-    struct twopass_rc *const twopass = &svc->layer_context[i].twopass;
+    TWO_PASS *const twopass = &svc->layer_context[i].twopass;
 
     svc->spatial_layer_id = i;
     vp9_init_second_pass(cpi);
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 74d9c1c..6881ce1 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -27,7 +27,7 @@
   int64_t maximum_buffer_size;
   double framerate;
   int avg_frame_size;
-  struct twopass_rc twopass;
+  TWO_PASS twopass;
   struct vpx_fixed_buf rc_twopass_stats_in;
   unsigned int current_video_frame_in_layer;
   int is_key_frame;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 6eff200..f501971 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -34,7 +34,8 @@
                                             uint8_t *u_mb_ptr,
                                             uint8_t *v_mb_ptr,
                                             int stride,
-                                            int uv_block_size,
+                                            int uv_block_width,
+                                            int uv_block_height,
                                             int mv_row,
                                             int mv_col,
                                             uint8_t *pred,
@@ -47,7 +48,7 @@
 
   enum mv_precision mv_precision_uv;
   int uv_stride;
-  if (uv_block_size == 8) {
+  if (uv_block_width == 8) {
     uv_stride = (stride + 1) >> 1;
     mv_precision_uv = MV_PRECISION_Q4;
   } else {
@@ -64,18 +65,18 @@
                             kernel, MV_PRECISION_Q3, x, y);
 
   vp9_build_inter_predictor(u_mb_ptr, uv_stride,
-                            &pred[256], uv_block_size,
+                            &pred[256], uv_block_width,
                             &mv,
                             scale,
-                            uv_block_size, uv_block_size,
+                            uv_block_width, uv_block_height,
                             which_mv,
                             kernel, mv_precision_uv, x, y);
 
   vp9_build_inter_predictor(v_mb_ptr, uv_stride,
-                            &pred[512], uv_block_size,
+                            &pred[512], uv_block_width,
                             &mv,
                             scale,
-                            uv_block_size, uv_block_size,
+                            uv_block_width, uv_block_height,
                             which_mv,
                             kernel, mv_precision_uv, x, y);
 }
@@ -91,7 +92,8 @@
 void vp9_temporal_filter_apply_c(uint8_t *frame1,
                                  unsigned int stride,
                                  uint8_t *frame2,
-                                 unsigned int block_size,
+                                 unsigned int block_width,
+                                 unsigned int block_height,
                                  int strength,
                                  int filter_weight,
                                  unsigned int *accumulator,
@@ -101,8 +103,8 @@
   int byte = 0;
   const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
 
-  for (i = 0, k = 0; i < block_size; i++) {
-    for (j = 0; j < block_size; j++, k++) {
+  for (i = 0, k = 0; i < block_height; i++) {
+    for (j = 0; j < block_width; j++, k++) {
       int src_byte = frame1[byte];
       int pixel_value = *frame2++;
 
@@ -127,7 +129,7 @@
       byte++;
     }
 
-    byte += stride - block_size;
+    byte += stride - block_width;
   }
 }
 
@@ -204,14 +206,12 @@
   uint8_t *dst1, *dst2;
   DECLARE_ALIGNED_ARRAY(16, uint8_t,  predictor, 16 * 16 * 3);
   const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
+  const int mb_uv_width  = 16 >> mbd->plane[1].subsampling_x;
 
   // Save input state
   uint8_t* input_buffer[MAX_MB_PLANE];
   int i;
 
-  // TODO(aconverse): Add 4:2:2 support
-  assert(mbd->plane[1].subsampling_x == mbd->plane[1].subsampling_y);
-
   for (i = 0; i < MAX_MB_PLANE; i++)
     input_buffer[i] = mbd->plane[i].pre[0].buf;
 
@@ -275,7 +275,7 @@
               cpi->frames[frame]->u_buffer + mb_uv_offset,
               cpi->frames[frame]->v_buffer + mb_uv_offset,
               cpi->frames[frame]->y_stride,
-              mb_uv_height,
+              mb_uv_width, mb_uv_height,
               mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
               mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
               predictor, scale,
@@ -283,16 +283,17 @@
 
           // Apply the filter (YUV)
           vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
-                                    predictor, 16, strength, filter_weight,
+                                    predictor, 16, 16,
+                                    strength, filter_weight,
                                     accumulator, count);
-
           vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
-                                    predictor + 256, mb_uv_height, strength,
+                                    predictor + 256,
+                                    mb_uv_width, mb_uv_height, strength,
                                     filter_weight, accumulator + 256,
                                     count + 256);
-
           vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
-                                    predictor + 512, mb_uv_height, strength,
+                                    predictor + 512,
+                                    mb_uv_width, mb_uv_height, strength,
                                     filter_weight, accumulator + 512,
                                     count + 512);
         }
@@ -321,7 +322,7 @@
       stride = cpi->alt_ref_buffer.uv_stride;
       byte = mb_uv_offset;
       for (i = 0, k = 256; i < mb_uv_height; i++) {
-        for (j = 0; j < mb_uv_height; j++, k++) {
+        for (j = 0; j < mb_uv_width; j++, k++) {
           int m = k + 256;
 
           // U
@@ -339,13 +340,13 @@
           // move to next pixel
           byte++;
         }
-        byte += stride - mb_uv_height;
+        byte += stride - mb_uv_width;
       }
       mb_y_offset += 16;
-      mb_uv_offset += mb_uv_height;
+      mb_uv_offset += mb_uv_width;
     }
     mb_y_offset += 16 * (f->y_stride - mb_cols);
-    mb_uv_offset += mb_uv_height * (f->uv_stride - mb_cols);
+    mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols;
   }
 
   // Restore input state
diff --git a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
index d2d13b3..673e0b3 100644
--- a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
+++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
@@ -15,41 +15,45 @@
 ;  (unsigned char  *frame1,           |  0
 ;   unsigned int    stride,           |  1
 ;   unsigned char  *frame2,           |  2
-;   unsigned int    block_size,       |  3
-;   int             strength,         |  4
-;   int             filter_weight,    |  5
-;   unsigned int   *accumulator,      |  6
-;   unsigned short *count)            |  7
+;   unsigned int    block_width,      |  3
+;   unsigned int    block_height,     |  4
+;   int             strength,         |  5
+;   int             filter_weight,    |  6
+;   unsigned int   *accumulator,      |  7
+;   unsigned short *count)            |  8
 global sym(vp9_temporal_filter_apply_sse2) PRIVATE
 sym(vp9_temporal_filter_apply_sse2):
 
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 8
+    SHADOW_ARGS_TO_STACK 9
     SAVE_XMM 7
     GET_GOT     rbx
     push        rsi
     push        rdi
     ALIGN_STACK 16, rax
-    %define block_size    0
-    %define strength      16
-    %define filter_weight 32
-    %define rounding_bit  48
-    %define rbp_backup    64
-    %define stack_size    80
+    %define block_width    0
+    %define block_height  16
+    %define strength      32
+    %define filter_weight 48
+    %define rounding_bit  64
+    %define rbp_backup    80
+    %define stack_size    96
     sub         rsp,           stack_size
     mov         [rsp + rbp_backup], rbp
     ; end prolog
 
         mov         rdx,            arg(3)
-        mov         [rsp + block_size], rdx
-        movd        xmm6,            arg(4)
+        mov         [rsp + block_width], rdx
+        mov         rdx,            arg(4)
+        mov         [rsp + block_height], rdx
+        movd        xmm6,           arg(5)
         movdqa      [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
 
         ; calculate the rounding bit outside the loop
         ; 0x8000 >> (16 - strength)
         mov         rdx,            16
-        sub         rdx,            arg(4) ; 16 - strength
+        sub         rdx,            arg(5) ; 16 - strength
         movq        xmm4,           rdx    ; can't use rdx w/ shift
         movdqa      xmm5,           [GLOBAL(_const_top_bit)]
         psrlw       xmm5,           xmm4
@@ -57,11 +61,11 @@
 
         mov         rsi,            arg(0) ; src/frame1
         mov         rdx,            arg(2) ; predictor frame
-        mov         rdi,            arg(6) ; accumulator
-        mov         rax,            arg(7) ; count
+        mov         rdi,            arg(7) ; accumulator
+        mov         rax,            arg(8) ; count
 
         ; dup the filter weight and store for later
-        movd        xmm0,           arg(5) ; filter_weight
+        movd        xmm0,           arg(6) ; filter_weight
         pshuflw     xmm0,           xmm0, 0
         punpcklwd   xmm0,           xmm0
         movdqa      [rsp + filter_weight], xmm0
@@ -69,10 +73,11 @@
         mov         rbp,            arg(1) ; stride
         pxor        xmm7,           xmm7   ; zero for extraction
 
-        lea         rcx,            [rdx + 16*16*1]
-        cmp         dword ptr [rsp + block_size], 8
+        mov         rcx,            [rsp + block_width]
+        imul        rcx,            [rsp + block_height]
+        add         rcx,            rdx
+        cmp         dword ptr [rsp + block_width], 8
         jne         .temporal_filter_apply_load_16
-        lea         rcx,            [rdx + 8*8*1]
 
 .temporal_filter_apply_load_8:
         movq        xmm0,           [rsi]  ; first row
@@ -178,7 +183,7 @@
         cmp         rdx,            rcx
         je          .temporal_filter_apply_epilog
         pxor        xmm7,           xmm7   ; zero for extraction
-        cmp         dword ptr [rsp + block_size], 16
+        cmp         dword ptr [rsp + block_width], 16
         je          .temporal_filter_apply_load_16
         jmp         .temporal_filter_apply_load_8
 
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index b874be7..4009a8a 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -496,7 +496,6 @@
 vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
                              vpx_codec_iface_t *iface,
                              vpx_codec_enc_cfg_t *enc_cfg) {
-  int max_intra_size_pct;
   vpx_codec_err_t res;
   SvcInternal *const si = get_svc_internal(svc_ctx);
   if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL ||
@@ -575,7 +574,6 @@
   // modify encoder configuration
   enc_cfg->ss_number_layers = si->layers;
   enc_cfg->ts_number_layers = 1;  // Temporal layers not used in this encoder.
-  enc_cfg->kf_mode = VPX_KF_DISABLED;
   // Lag in frames not currently supported
   enc_cfg->g_lag_in_frames = 0;
 
@@ -605,16 +603,8 @@
   }
 
   vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1);
-  vpx_codec_control(codec_ctx, VP8E_SET_CPUUSED, 1);
-  vpx_codec_control(codec_ctx, VP8E_SET_STATIC_THRESHOLD, 1);
-  vpx_codec_control(codec_ctx, VP8E_SET_NOISE_SENSITIVITY, 1);
   vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1);
 
-  max_intra_size_pct =
-      (int)(((double)enc_cfg->rc_buf_optimal_sz * 0.5) *
-            ((double)enc_cfg->g_timebase.den / enc_cfg->g_timebase.num) / 10.0);
-  vpx_codec_control(codec_ctx, VP8E_SET_MAX_INTRA_BITRATE_PCT,
-                    max_intra_size_pct);
   return VPX_CODEC_OK;
 }
 
@@ -869,8 +859,7 @@
   si->rc_stats_buf_used = 0;
 
   si->layers = svc_ctx->spatial_layers;
-  if (si->frame_within_gop >= si->kf_dist ||
-      si->encode_frame_count == 0) {
+  if (si->encode_frame_count == 0) {
     si->frame_within_gop = 0;
   }
   si->is_keyframe = (si->frame_within_gop == 0);
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index 98d1d56..a1ad3c5 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -15,8 +15,10 @@
 API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h
 API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h
 API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h
-API_SRCS-$(CONFIG_VP9_ENCODER) += src/svc_encodeframe.c
-API_SRCS-$(CONFIG_VP9_ENCODER) += svc_context.h
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+  API_SRCS-$(CONFIG_SPATIAL_SVC) += src/svc_encodeframe.c
+  API_SRCS-$(CONFIG_SPATIAL_SVC) += svc_context.h
+endif
 
 API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h
 API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h
diff --git a/vpxenc.c b/vpxenc.c
index 28d43f2..4d96294 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1264,6 +1264,10 @@
 
   /* Scale if necessary */
   if (img && (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) {
+    if (img->fmt != VPX_IMG_FMT_I420 && img->fmt != VPX_IMG_FMT_YV12) {
+      fprintf(stderr, "%s can only scale 4:2:0 8bpp inputs\n", exec_name);
+      exit(EXIT_FAILURE);
+    }
     if (!stream->img)
       stream->img = vpx_img_alloc(NULL, VPX_IMG_FMT_I420,
                                   cfg->g_w, cfg->g_h, 16);