Merge "Fix C versions of DC calculation functions"

diff --git a/iosbuild.sh b/build/make/iosbuild.sh
similarity index 74%
rename from iosbuild.sh
rename to build/make/iosbuild.sh
index 0092bf2..bc3cc94 100755
--- a/iosbuild.sh
+++ b/build/make/iosbuild.sh

@@ -22,7 +22,7 @@
 FRAMEWORK_DIR="VPX.framework"
 HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
 MAKE_JOBS=1
-LIBVPX_SOURCE_DIR=$(dirname "$0")
+LIBVPX_SOURCE_DIR=$(dirname "$0" | sed -e s,/build/make,,)
 LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
 ORIG_PWD="$(pwd)"
 TARGETS="armv6-darwin-gcc
@@ -31,14 +31,6 @@
          x86-iphonesimulator-gcc
          x86_64-iphonesimulator-gcc"
 
-# This variable is set to the last dist dir used with make dist, and reused when
-# populating the framework directory to get the path to the most recent
-# includes.
-TARGET_DIST_DIR=""
-
-# List of library files passed to lipo.
-LIBS=""
-
 build_target() {
   local target="$1"
   local old_pwd="$(pwd)"
@@ -57,8 +49,10 @@
 }
 
 build_targets() {
+  local lib_list=""
   local targets="$1"
-  local target
+  local target=""
+  local target_dist_dir=""
 
   # Clean up from previous build(s).
   rm -rf "${BUILD_ROOT}" "${FRAMEWORK_DIR}"
@@ -71,11 +65,28 @@
 
   for target in ${targets}; do
     build_target "${target}"
-    TARGET_DIST_DIR="${BUILD_ROOT}/${target}/${DIST_DIR}"
-    LIBS="${LIBS} ${TARGET_DIST_DIR}/lib/libvpx.a"
+    target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
+    lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a"
   done
 
   cd "${ORIG_PWD}"
+
+  # Includes are identical for all platforms, and according to dist target
+  # behavior vpx_config.h and vpx_version.h aren't actually necessary for user
+  # apps built with libvpx. So, just copy the includes from the last target
+  # built.
+  # TODO(tomfinegan): The above is a lame excuse. Build common config/version
+  # includes that use the preprocessor to include the correct file.
+  cp -p "${target_dist_dir}"/include/vpx/* "${HEADER_DIR}"
+  ${LIPO} -create ${lib_list} -output ${FRAMEWORK_DIR}/VPX
+
+  vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
+  for lib in ${lib_list}; do
+    vlog "  $(echo ${lib} | awk -F / '{print $2, $NF}')"
+  done
+
+  # TODO(tomfinegan): Verify that expected targets are included within
+  # VPX.framework/VPX via lipo -info.
 }
 
 cleanup() {
@@ -150,16 +161,3 @@
 fi
 
 build_targets "${TARGETS}"
-
-# Includes are identical for all platforms, and according to dist target
-# behavior vpx_config.h and vpx_version.h aren't actually necessary for user
-# apps built with libvpx. So, just copy the includes from the last target built.
-# TODO(tomfinegan): The above is a lame excuse. Build common config/version
-# includes that use the preprocessor to include the correct file.
-cp -p "${TARGET_DIST_DIR}"/include/vpx/* "${HEADER_DIR}"
-${LIPO} -create ${LIBS} -output ${FRAMEWORK_DIR}/VPX
-
-vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
-for lib in ${LIBS}; do
-  vlog "  $(echo ${lib} | awk -F / '{print $2, $NF}')"
-done

diff --git a/test/decode_to_md5.sh b/test/decode_to_md5.sh
index f64acc8..28e29c6 100755
--- a/test/decode_to_md5.sh
+++ b/test/decode_to_md5.sh

@@ -34,7 +34,10 @@
   local expected_md5="$3"
   local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"
 
-  [ -x "${decoder}" ] || return 1
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
 

diff --git a/test/decode_with_drops.sh b/test/decode_with_drops.sh
index 82e934d..12e17de 100755
--- a/test/decode_with_drops.sh
+++ b/test/decode_with_drops.sh

@@ -34,7 +34,10 @@
   local output_file="${VPX_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
   local drop_mode="$3"
 
-  [ -x "${decoder}" ] || return 1
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" ${devnull}
 

diff --git a/test/postproc.sh b/test/postproc.sh
index 050a368..c9c4e58 100755
--- a/test/postproc.sh
+++ b/test/postproc.sh

@@ -32,7 +32,10 @@
   local codec="$2"
   local output_file="${VPX_TEST_OUTPUT_DIR}/postproc_${codec}.raw"
 
-  [ -x "${decoder}" ] || return 1
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
 

diff --git a/test/resize_util.sh b/test/resize_util.sh
index 2a8e3fb..ab3dfd1 100755
--- a/test/resize_util.sh
+++ b/test/resize_util.sh

@@ -33,7 +33,10 @@
 
   # resize_util is available only when CONFIG_SHARED is disabled.
   if [ -z "$(vpx_config_option_enabled CONFIG_SHARED)" ]; then
-    [ -x "${resizer}" ] || return 1
+    if [ ! -x "${resizer}" ]; then
+      elog "${resizer} does not exist or is not executable."
+      return 1
+    fi
 
     eval "${resizer}" "${YUV_RAW_INPUT}" \
         "${YUV_RAW_INPUT_WIDTH}x${YUV_RAW_INPUT_HEIGHT}" \

diff --git a/test/simple_decoder.sh b/test/simple_decoder.sh
index 24b17c5..0be48e6 100755
--- a/test/simple_decoder.sh
+++ b/test/simple_decoder.sh

@@ -32,7 +32,10 @@
   local codec="$2"
   local output_file="${VPX_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"
 
-  [ -x "${decoder}" ] || return 1
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
 

diff --git a/test/simple_encoder.sh b/test/simple_encoder.sh
index 6232093..a0b0e13 100755
--- a/test/simple_encoder.sh
+++ b/test/simple_encoder.sh

@@ -29,7 +29,10 @@
   local codec="$1"
   local output_file="${VPX_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"
 
-  [ -x "${encoder}" ] || return 1
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
       "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \

diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 56946b5..0def69d 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1

@@ -638,4 +638,5 @@
 e3ab35d4316c5e81325c50f5236ceca4bc0d35df  vp90-2-15-segkey.webm.md5
 9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d  vp90-2-15-segkey_adpq.webm
 8f46ba5f785d0c2170591a153e0d0d146a7c8090  vp90-2-15-segkey_adpq.webm.md5
-
+d78e2fceba5ac942246503ec8366f879c4775ca5  vp90-2-15-fuzz-flicker.webm
+bbd7dd15f43a703ff0a332fee4959e7b23bf77dc  vp90-2-15-fuzz-flicker.webm.md5

diff --git a/test/test.mk b/test/test.mk
index 56e467a..c59ae11 100644
--- a/test/test.mk
+++ b/test/test.mk

@@ -754,6 +754,8 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-fuzz-flicker.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-fuzz-flicker.webm.md5
 
 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams

diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index fd8c4c3..3873712 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc

@@ -178,7 +178,8 @@
   "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
   "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
   "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
-  "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm"
+  "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm",
+  "vp90-2-15-fuzz-flicker.webm"
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER

diff --git a/test/tools_common.sh b/test/tools_common.sh
index bb02429..7f32905 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh

@@ -17,6 +17,10 @@
 set -e
 devnull='> /dev/null 2>&1'
 
+elog() {
+  echo "$@" 1>&2
+}
+
 vlog() {
   if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
     echo "$@"
@@ -456,10 +460,19 @@
   LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH}
   LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH}
   LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH}
-  VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
-  VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
+  VP8_IVF_FILE=${VP8_IVF_FILE}
+  VP9_IVF_FILE=${VP9_IVF_FILE}
+  VP9_WEBM_FILE=${VP9_WEBM_FILE}
+  VPX_TEST_EXE_SUFFIX=${VPX_TEST_EXE_SUFFIX}
   VPX_TEST_FILTER=${VPX_TEST_FILTER}
+  VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
+  VPX_TEST_RAND=${VPX_TEST_RAND}
   VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
-  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}"
+  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}
+  VPX_TEST_TEMP_ROOT=${VPX_TEST_TEMP_ROOT}
+  VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
+  YUV_RAW_INPUT=${YUV_RAW_INPUT}
+  YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
+  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}"
 
 fi  # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.

diff --git a/test/twopass_encoder.sh b/test/twopass_encoder.sh
index fe3cbbb..95d49d6 100755
--- a/test/twopass_encoder.sh
+++ b/test/twopass_encoder.sh

@@ -29,7 +29,10 @@
   local codec="$1"
   local output_file="${VPX_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf"
 
-  [ -x "${encoder}" ] || return 1
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
       "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \

diff --git a/test/vp8cx_set_ref.sh b/test/vp8cx_set_ref.sh
index ef9d0c0..ee10056 100755
--- a/test/vp8cx_set_ref.sh
+++ b/test/vp8cx_set_ref.sh

@@ -34,7 +34,10 @@
   local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf"
   local ref_frame_num=90
 
-  [ -x "${encoder}" ] || return 1
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${encoder}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
       "${YUV_RAW_INPUT}" "${output_file}" "${ref_frame_num}" \

diff --git a/test/vp9_spatial_svc_encoder.sh b/test/vp9_spatial_svc_encoder.sh
index 635cfa2..8c9d130 100755
--- a/test/vp9_spatial_svc_encoder.sh
+++ b/test/vp9_spatial_svc_encoder.sh

@@ -34,7 +34,10 @@
 
   shift
 
-  [ -x "${encoder}" ] || return 1
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${encoder}" -w "${YUV_RAW_INPUT_WIDTH}" -h "${YUV_RAW_INPUT_HEIGHT}" \
       -k "${max_kf}" -f "${frames_to_encode}" "$@" "${YUV_RAW_INPUT}" \

diff --git a/test/vpx_temporal_svc_encoder.sh b/test/vpx_temporal_svc_encoder.sh
index ff64740..b2e968f 100755
--- a/test/vpx_temporal_svc_encoder.sh
+++ b/test/vpx_temporal_svc_encoder.sh

@@ -39,7 +39,10 @@
 
   shift 2
 
-  [ -x "${encoder}" ] || return 1
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
 
   eval "${encoder}" "${YUV_RAW_INPUT}" "${output_file}" "${codec}" \
       "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \

diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index f26741f..5616bda 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c

@@ -191,10 +191,12 @@
     return FILTER_BLOCK;
 }
 
-int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height)
+int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
+                          int num_mb_rows, int num_mb_cols)
 {
     int i;
     assert(denoiser);
+    denoiser->num_mb_cols = num_mb_cols;
 
     for (i = 0; i < MAX_REF_FRAMES; i++)
     {
@@ -222,6 +224,10 @@
 
     vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
                denoiser->yv12_mc_running_avg.frame_size);
+
+    denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
+    vpx_memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
+
     return 0;
 }
 
@@ -243,13 +249,20 @@
                              unsigned int best_sse,
                              unsigned int zero_mv_sse,
                              int recon_yoffset,
-                             int recon_uvoffset)
+                             int recon_uvoffset,
+                             loop_filter_info_n *lfi_n,
+                             int mb_row,
+                             int mb_col,
+                             int block_index)
 {
     int mv_row;
     int mv_col;
     unsigned int motion_magnitude2;
     unsigned int sse_thresh;
     int sse_diff_thresh = 0;
+    // Spatial loop filter: only applied selectively based on
+    // temporal filter state of block relative to top/left neighbors.
+    int apply_spatial_loop_filter = 1;
     MV_REFERENCE_FRAME frame = x->best_reference_frame;
     MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
 
@@ -362,6 +375,8 @@
                                          running_avg_y, avg_y_stride,
                                          x->thismb, 16, motion_magnitude2,
                                          x->increase_denoising);
+        denoiser->denoise_state[block_index] = motion_magnitude2 > 0 ?
+            kFilterNonZeroMV : kFilterZeroMV;
     }
     if (decision == COPY_BLOCK)
     {
@@ -372,5 +387,59 @@
                 x->thismb, 16,
                 denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
                 denoiser->yv12_running_avg[INTRA_FRAME].y_stride);
+        denoiser->denoise_state[block_index] = kNoFilter;
+    }
+    // Option to selectively deblock the denoised signal.
+    if (apply_spatial_loop_filter) {
+      loop_filter_info lfi;
+      int apply_filter_col = 0;
+      int apply_filter_row = 0;
+      int apply_filter = 0;
+      int y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride;
+      int uv_stride =denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
+
+      // Fix filter level to some nominal value for now.
+      int filter_level = 32;
+
+      int hev_index = lfi_n->hev_thr_lut[INTER_FRAME][filter_level];
+      lfi.mblim = lfi_n->mblim[filter_level];
+      lfi.blim = lfi_n->blim[filter_level];
+      lfi.lim = lfi_n->lim[filter_level];
+      lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+      // Apply filter if there is a difference in the denoiser filter state
+      // between the current and left/top block, or if non-zero motion vector
+      // is used for the motion-compensated filtering.
+      if (mb_col > 0) {
+        apply_filter_col = !((denoiser->denoise_state[block_index] ==
+            denoiser->denoise_state[block_index - 1]) &&
+            denoiser->denoise_state[block_index] != kFilterNonZeroMV);
+        if (apply_filter_col) {
+          // Filter left vertical edge.
+          apply_filter = 1;
+          vp8_loop_filter_mbv(
+              denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
+              NULL, NULL, y_stride, uv_stride, &lfi);
+        }
+      }
+      if (mb_row > 0) {
+        apply_filter_row = !((denoiser->denoise_state[block_index] ==
+            denoiser->denoise_state[block_index - denoiser->num_mb_cols]) &&
+            denoiser->denoise_state[block_index] != kFilterNonZeroMV);
+        if (apply_filter_row) {
+          // Filter top horizontal edge.
+          apply_filter = 1;
+          vp8_loop_filter_mbh(
+              denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
+              NULL, NULL, y_stride, uv_stride, &lfi);
+        }
+      }
+      if (apply_filter) {
+        // Update the signal block |x|. Pixel changes are only to top and/or
+        // left boundary pixels: can we avoid full block copy here.
+        vp8_copy_mem16x16(
+            denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
+            y_stride, x->thismb, 16);
+      }
     }
 }

diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
index ae744d2..6db0785 100644
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h

@@ -12,6 +12,7 @@
 #define VP8_ENCODER_DENOISING_H_
 
 #include "block.h"
+#include "vp8/common/loopfilter.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -27,13 +28,22 @@
   FILTER_BLOCK
 };
 
+enum vp8_denoiser_filter_state {
+  kNoFilter,
+  kFilterZeroMV,
+  kFilterNonZeroMV
+};
+
 typedef struct vp8_denoiser
 {
     YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES];
     YV12_BUFFER_CONFIG yv12_mc_running_avg;
+    unsigned char* denoise_state;
+    int num_mb_cols;
 } VP8_DENOISER;
 
-int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height);
+int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
+                          int num_mb_rows, int num_mb_cols);
 
 void vp8_denoiser_free(VP8_DENOISER *denoiser);
 
@@ -42,7 +52,11 @@
                              unsigned int best_sse,
                              unsigned int zero_mv_sse,
                              int recon_yoffset,
-                             int recon_uvoffset);
+                             int recon_uvoffset,
+                             loop_filter_info_n *lfi_n,
+                             int mb_row,
+                             int mb_col,
+                             int block_index);
 
 #ifdef __cplusplus
 }  // extern "C"

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index b550f6b..e6b0f9b 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c

@@ -1246,7 +1246,7 @@
             x->zbin_mode_boost_enabled = 0;
         }
         vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                               &distortion, &intra_error);
+                               &distortion, &intra_error, mb_row, mb_col);
 
         /* switch back to the regular quantizer for the encode */
         if (cpi->sf.improved_quant)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 4d16a5f..09854a5 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c

@@ -1751,7 +1751,8 @@
       {
         int width = (cpi->oxcf.Width + 15) & ~15;
         int height = (cpi->oxcf.Height + 15) & ~15;
-        vp8_denoiser_allocate(&cpi->denoiser, width, height);
+        vp8_denoiser_allocate(&cpi->denoiser, width, height,
+                              cpi->common.mb_rows, cpi->common.mb_cols);
       }
     }
 #endif

diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 817c9ef..3a78ee9 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c

@@ -1168,6 +1168,7 @@
 #if CONFIG_TEMPORAL_DENOISING
     if (cpi->oxcf.noise_sensitivity)
     {
+        int block_index = mb_row * cpi->common.mb_cols + mb_col;
         if (x->best_sse_inter_mode == DC_PRED)
         {
             /* No best MV found. */
@@ -1179,7 +1180,9 @@
         }
         x->increase_denoising = 0;
         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
-                                recon_yoffset, recon_uvoffset);
+                                recon_yoffset, recon_uvoffset,
+                                &cpi->common.lf_info, mb_row, mb_col,
+                                block_index);
 
 
         /* Reevaluate ZEROMV after denoising. */

diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index f145d09..4465b5e 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c

@@ -1935,7 +1935,8 @@
 
 void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                             int recon_uvoffset, int *returnrate,
-                            int *returndistortion, int *returnintra)
+                            int *returndistortion, int *returnintra,
+                            int mb_row, int mb_col)
 {
     BLOCK *b = &x->block[0];
     BLOCKD *d = &x->e_mbd.block[0];
@@ -2510,6 +2511,7 @@
 #if CONFIG_TEMPORAL_DENOISING
     if (cpi->oxcf.noise_sensitivity)
     {
+        int block_index = mb_row * cpi->common.mb_cols + mb_col;
         if (x->best_sse_inter_mode == DC_PRED)
         {
             /* No best MV found. */
@@ -2520,7 +2522,9 @@
             best_sse = best_rd_sse;
         }
         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
-                                recon_yoffset, recon_uvoffset);
+                                recon_yoffset, recon_uvoffset,
+                                &cpi->common.lf_info, mb_row, mb_col,
+                                block_index);
 
 
         /* Reevaluate ZEROMV after denoising. */

diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index fe21b8e..e0da35e 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h

@@ -70,7 +70,10 @@
 }
 
 extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue);
-extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
+extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x,
+                                   int recon_yoffset, int recon_uvoffset,
+                                   int *returnrate, int *returndistortion,
+                                   int *returnintra, int mb_row, int mb_col);
 extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate);
 
 

diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 66a3956..68613ec 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -402,25 +402,25 @@
 
 # variance
 add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc";
+specialize qw/vp9_variance32x16 avx2/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x32/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc";
+specialize qw/vp9_variance64x32 avx2/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance32x64/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc";
+specialize qw/vp9_variance32x32 avx2/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
+specialize qw/vp9_variance64x64 avx2/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
+specialize qw/vp9_variance16x16 mmx avx2/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
@@ -693,7 +693,7 @@
 specialize qw/vp9_sad4x4x4d sse/;
 
 add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
+specialize qw/vp9_mse16x16 mmx avx2/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
 specialize qw/vp9_mse8x16/;

diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 13d79ff4..5859859 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c

@@ -211,7 +211,10 @@
   }
 
   cm->frame_to_show = get_frame_new_buffer(cm);
-  cm->frame_bufs[cm->new_fb_idx].ref_count--;
+
+  if (!pbi->frame_parallel_decode || !cm->show_frame) {
+    --cm->frame_bufs[cm->new_fb_idx].ref_count;
+  }
 
   // Invalidate these references until the next frame starts.
   for (ref_index = 0; ref_index < 3; ref_index++)
@@ -240,7 +243,9 @@
   }
 
   // Check if the previous frame was a frame without any references to it.
-  if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
+  // Release frame buffer if not decoding in frame parallel mode.
+  if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 &&
+         cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
     cm->release_fb_cb(cm->cb_priv,
                       &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer);
   cm->new_fb_idx = get_free_fb(cm);

diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 1aaa1d5..525eccd 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c

@@ -538,7 +538,7 @@
   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
 
   if ((cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.rc_mode == RC_MODE_CBR) ||
+      cpi->oxcf.rc_mode == VPX_CBR) ||
       (cpi->svc.number_spatial_layers > 1 &&
       cpi->oxcf.mode == TWO_PASS_SECOND_BEST)) {
     vp9_init_layer_context(cpi);
@@ -609,7 +609,7 @@
   cpi->encode_breakout = cpi->oxcf.encode_breakout;
 
   // local file playback mode == really big buffer
-  if (cpi->oxcf.rc_mode == RC_MODE_VBR) {
+  if (cpi->oxcf.rc_mode == VPX_VBR) {
     cpi->oxcf.starting_buffer_level_ms = 60000;
     cpi->oxcf.optimal_buffer_level_ms = 60000;
     cpi->oxcf.maximum_buffer_size_ms = 240000;
@@ -657,7 +657,7 @@
   update_frame_size(cpi);
 
   if ((cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.rc_mode == RC_MODE_CBR) ||
+      cpi->oxcf.rc_mode == VPX_CBR) ||
       (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
     vp9_update_layer_context_change_config(cpi,
                                            (int)cpi->oxcf.target_bandwidth);
@@ -1487,7 +1487,7 @@
     if ((rc->projected_frame_size > high_limit && q < maxq) ||
         (rc->projected_frame_size < low_limit && q > minq)) {
       force_recode = 1;
-    } else if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY) {
+    } else if (cpi->oxcf.rc_mode == VPX_CQ) {
       // Deal with frame undershoot and whether or not we are
       // below the automatically set cq level.
       if (q > oxcf->cq_level &&
@@ -1789,7 +1789,7 @@
         frame_over_shoot_limit = 1;
     }
 
-    if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) {
+    if (cpi->oxcf.rc_mode == VPX_Q) {
       loop = 0;
     } else {
       if ((cm->frame_type == KEY_FRAME) &&
@@ -1887,7 +1887,7 @@
             // This should only trigger where there is very substantial
             // undershoot on a frame and the auto cq level is above
             // the user passsed in value.
-            if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY &&
+            if (cpi->oxcf.rc_mode == VPX_CQ &&
                 q < q_low) {
               q_low = q;
             }
@@ -2085,7 +2085,7 @@
   // For 1 pass CBR, check if we are dropping this frame.
   // Never drop on key frame.
   if (cpi->pass == 0 &&
-      cpi->oxcf.rc_mode == RC_MODE_CBR &&
+      cpi->oxcf.rc_mode == VPX_CBR &&
       cm->frame_type != KEY_FRAME) {
     if (vp9_rc_drop_frame(cpi)) {
       vp9_rc_postencode_update_drop_frame(cpi);
@@ -2281,7 +2281,7 @@
 
 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
                         unsigned int *frame_flags) {
-  if (cpi->oxcf.rc_mode == RC_MODE_CBR) {
+  if (cpi->oxcf.rc_mode == VPX_CBR) {
     vp9_rc_get_one_pass_cbr_params(cpi);
   } else {
     vp9_rc_get_one_pass_vbr_params(cpi);
@@ -2561,7 +2561,7 @@
   }
 
   if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.rc_mode == RC_MODE_CBR) {
+      cpi->oxcf.rc_mode == VPX_CBR) {
     vp9_update_temporal_layer_framerate(cpi);
     vp9_restore_layer_context(cpi);
   }
@@ -2594,7 +2594,7 @@
   if (cpi->pass == 2 &&
       cm->current_video_frame == 0 &&
       cpi->oxcf.allow_spatial_resampling &&
-      cpi->oxcf.rc_mode == RC_MODE_VBR) {
+      cpi->oxcf.rc_mode == VPX_VBR) {
     // Internal scaling is triggered on the first frame.
     vp9_set_size_literal(cpi, cpi->oxcf.scaled_frame_width,
                          cpi->oxcf.scaled_frame_height);
@@ -2656,7 +2656,7 @@
 
   // Save layer specific state.
   if ((cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.rc_mode == RC_MODE_CBR) ||
+      cpi->oxcf.rc_mode == VPX_CBR) ||
       (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
     vp9_save_layer_context(cpi);
   }

diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 4364939..a27868a 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h

@@ -134,13 +134,6 @@
 } VPX_SCALING;
 
 typedef enum {
-  RC_MODE_VBR = 0,
-  RC_MODE_CBR = 1,
-  RC_MODE_CONSTRAINED_QUALITY = 2,
-  RC_MODE_CONSTANT_QUALITY    = 3,
-} RC_MODE;
-
-typedef enum {
   // Good Quality Fast Encoding. The encoder balances quality with the
   // amount of time it takes to encode the output. (speed setting
   // controls how fast)
@@ -212,7 +205,8 @@
   // ----------------------------------------------------------------
   // DATARATE CONTROL OPTIONS
 
-  RC_MODE rc_mode;  // vbr, cbr, constrained quality or constant quality
+  // vbr, cbr, constrained quality or constant quality
+  enum vpx_rc_mode rc_mode;
 
   // buffer targeting aggressiveness
   int under_shoot_pct;

diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index b430b56..7cadb36 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c

@@ -596,91 +596,73 @@
       if (cm->current_video_frame > 0) {
         int tmp_err, motion_error;
         int_mv mv, tmp_mv;
-        int raw_motion_error;
-        struct buf_2d unscaled_last_source_buf_2d;
 
         xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
         motion_error = get_prediction_error(bsize, &x->plane[0].src,
                                             &xd->plane[0].pre[0]);
+        // Assume 0,0 motion with no mv overhead.
+        mv.as_int = tmp_mv.as_int = 0;
 
-        // compute the motion error of the zero motion vector using the last
-        // source frame as the reference
-        // skip the further motion search on reconstructed frame
-        // if this error is small
-        unscaled_last_source_buf_2d.buf = cpi->unscaled_last_source->y_buffer
-                                  + recon_yoffset;
-        unscaled_last_source_buf_2d.stride =
-                                          cpi->unscaled_last_source->y_stride;
-        raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
-                                                &unscaled_last_source_buf_2d);
+        // Test last reference frame using the previous best mv as the
+        // starting point (best reference) for the search.
+        first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
+                                 &motion_error);
+        if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+          vp9_clear_system_state();
+          motion_error = (int)(motion_error * error_weight);
+        }
 
-        // TODO(pengchong): Replace the hard-coded threshold
-        if (raw_motion_error > 25) {
-          // Assume 0,0 motion with no mv overhead.
-          mv.as_int = tmp_mv.as_int = 0;
-
-          // Test last reference frame using the previous best mv as the
-          // starting point (best reference) for the search.
-          first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
-                                   &motion_error);
+        // If the current best reference mv is not centered on 0,0 then do a 0,0
+        // based search as well.
+        if (best_ref_mv.as_int) {
+          tmp_err = INT_MAX;
+          first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+                                   &tmp_err);
           if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
             vp9_clear_system_state();
-            motion_error = (int)(motion_error * error_weight);
+            tmp_err = (int)(tmp_err * error_weight);
           }
 
-          // If the current best reference mv is not centered on 0,0
-          // then do a 0,0
-          // based search as well.
-          if (best_ref_mv.as_int) {
-            tmp_err = INT_MAX;
-            first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
-                                     &tmp_err);
-            if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
-              vp9_clear_system_state();
-              tmp_err = (int)(tmp_err * error_weight);
-            }
+          if (tmp_err < motion_error) {
+            motion_error = tmp_err;
+            mv.as_int = tmp_mv.as_int;
+          }
+        }
 
-            if (tmp_err < motion_error) {
-              motion_error = tmp_err;
-              mv.as_int = tmp_mv.as_int;
-            }
+        // Search in an older reference frame.
+        if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
+          // Assume 0,0 motion with no mv overhead.
+          int gf_motion_error;
+
+          xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
+          gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+                                                 &xd->plane[0].pre[0]);
+
+          first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+                                   &gf_motion_error);
+          if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+            vp9_clear_system_state();
+            gf_motion_error = (int)(gf_motion_error * error_weight);
           }
 
-          // Search in an older reference frame.
-          if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
-            // Assume 0,0 motion with no mv overhead.
-            int gf_motion_error;
+          if (gf_motion_error < motion_error && gf_motion_error < this_error)
+            ++second_ref_count;
 
-            xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
-            gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
-                                                   &xd->plane[0].pre[0]);
+          // Reset to last frame as reference buffer.
+          xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+          xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
+          xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
 
-            first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
-                                     &gf_motion_error);
-            if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
-              vp9_clear_system_state();
-              gf_motion_error = (int)(gf_motion_error * error_weight);
-            }
-
-            if (gf_motion_error < motion_error && gf_motion_error < this_error)
-              ++second_ref_count;
-
-            // Reset to last frame as reference buffer.
-            xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
-            xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
-            xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
-
-            // In accumulating a score for the older reference frame take the
-            // best of the motion predicted score and the intra coded error
-            // (just as will be done for) accumulation of "coded_error" for
-            // the last frame.
-            if (gf_motion_error < this_error)
-              sr_coded_error += gf_motion_error;
-            else
-              sr_coded_error += this_error;
-          } else {
-            sr_coded_error += motion_error;
-          }
+          // In accumulating a score for the older reference frame take the
+          // best of the motion predicted score and the intra coded error
+          // (just as will be done for) accumulation of "coded_error" for
+          // the last frame.
+          if (gf_motion_error < this_error)
+            sr_coded_error += gf_motion_error;
+          else
+            sr_coded_error += this_error;
+        } else {
+          sr_coded_error += motion_error;
         }
         // Start by assuming that intra mode is best.
         best_ref_mv.as_int = 0;
@@ -922,7 +904,7 @@
     }
 
     // Restriction on active max q for constrained quality mode.
-    if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY)
+    if (cpi->oxcf.rc_mode == VPX_CQ)
       q = MAX(q, oxcf->cq_level);
     return q;
   }
@@ -2144,7 +2126,7 @@
     rc->base_frame_target = target_rate;
 #ifdef LONG_TERM_VBR_CORRECTION
     // Correction to rate target based on prior over or under shoot.
-    if (cpi->oxcf.rc_mode == RC_MODE_VBR)
+    if (cpi->oxcf.rc_mode == VPX_VBR)
       vbr_rate_correction(&target_rate, rc->vbr_bits_off_target);
 #endif
     vp9_rc_set_frame_target(cpi, target_rate);
@@ -2159,7 +2141,7 @@
     twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
   }
 
-  if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) {
+  if (cpi->oxcf.rc_mode == VPX_Q) {
     twopass->active_worst_quality = cpi->oxcf.cq_level;
   } else if (cm->current_video_frame == 0 ||
              (is_spatial_svc && lc->current_video_frame_in_layer == 0)) {
@@ -2244,7 +2226,7 @@
   rc->base_frame_target = target_rate;
 #ifdef LONG_TERM_VBR_CORRECTION
   // Correction to rate target based on prior over or under shoot.
-  if (cpi->oxcf.rc_mode == RC_MODE_VBR)
+  if (cpi->oxcf.rc_mode == VPX_VBR)
     vbr_rate_correction(&target_rate, rc->vbr_bits_off_target);
 #endif
   vp9_rc_set_frame_target(cpi, target_rate);

diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 80966ad..3f4fcd1 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c

@@ -158,15 +158,16 @@
   int64_t dist;
   struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
-  const int quant = pd->dequant[1];
+  const uint32_t dc_quant = pd->dequant[0];
+  const uint32_t ac_quant = pd->dequant[1];
   unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                            pd->dst.buf, pd->dst.stride, &sse);
   *var_y = var;
   *sse_y = sse;
 
-  if (sse < pd->dequant[0] * pd->dequant[0] >> 6)
+  if (sse < dc_quant * dc_quant >> 6)
     x->skip_txfm = 1;
-  else if (var < quant * quant >> 6)
+  else if (var < ac_quant * ac_quant >> 6)
     x->skip_txfm = 2;
   else
     x->skip_txfm = 0;
@@ -177,7 +178,7 @@
   if ((sse >> 3) > var)
     sse = var;
   vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bsize],
-                               quant >> 3, &rate, &dist);
+                               ac_quant >> 3, &rate, &dist);
   *out_rate_sum = rate;
   *out_dist_sum = dist << 3;
 }

diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 143c23b..f775003 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c

@@ -180,13 +180,13 @@
   rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
   rc->buffer_level = rc->bits_off_target;
 
-  if (cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR) {
+  if (cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR) {
     update_layer_buffer_level(&cpi->svc, encoded_frame_size);
   }
 }
 
 void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
-  if (pass == 0 && oxcf->rc_mode == RC_MODE_CBR) {
+  if (pass == 0 && oxcf->rc_mode == VPX_CBR) {
     rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
     rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
   } else {
@@ -276,7 +276,7 @@
   } else {
     if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
         !cpi->rc.is_src_frame_alt_ref &&
-        !(cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR))
+        !(cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR))
       return cpi->rc.gf_rate_correction_factor;
     else
       return cpi->rc.rate_correction_factor;
@@ -289,7 +289,7 @@
   } else {
     if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
         !cpi->rc.is_src_frame_alt_ref &&
-        !(cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR))
+        !(cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR))
       cpi->rc.gf_rate_correction_factor = factor;
     else
       cpi->rc.rate_correction_factor = factor;
@@ -605,7 +605,7 @@
                                const VP9EncoderConfig *const oxcf) {
   static const double cq_adjust_threshold = 0.5;
   int active_cq_level = oxcf->cq_level;
-  if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY &&
+  if (oxcf->rc_mode == VPX_CQ &&
       rc->total_target_bits > 0) {
     const double x = (double)rc->total_actual_bits / rc->total_target_bits;
     if (x < cq_adjust_threshold) {
@@ -679,7 +679,7 @@
       q = rc->avg_frame_qindex[KEY_FRAME];
     }
     // For constrained quality dont allow Q less than the cq level
-    if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) {
+    if (oxcf->rc_mode == VPX_CQ) {
       if (q < cq_level)
         q = cq_level;
 
@@ -691,7 +691,7 @@
       // Constrained quality use slightly lower active best.
       active_best_quality = active_best_quality * 15 / 16;
 
-    } else if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
+    } else if (oxcf->rc_mode == VPX_Q) {
       if (!cpi->refresh_alt_ref_frame) {
         active_best_quality = cq_level;
       } else {
@@ -705,7 +705,7 @@
           arfgf_low_motion_minq, arfgf_high_motion_minq);
     }
   } else {
-    if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
+    if (oxcf->rc_mode == VPX_Q) {
       active_best_quality = cq_level;
     } else {
       // Use the lower of active_worst_quality and recent/average Q.
@@ -715,7 +715,7 @@
         active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
-      if ((oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) &&
+      if ((oxcf->rc_mode == VPX_CQ) &&
           (active_best_quality < cq_level)) {
         active_best_quality = cq_level;
       }
@@ -752,7 +752,7 @@
   }
 #endif
 
-  if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
+  if (oxcf->rc_mode == VPX_Q) {
     q = active_best_quality;
   // Special case code to try and match quality with forced key frames
   } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
@@ -771,7 +771,7 @@
 #if CONFIG_MULTIPLE_ARF
   // Force the quantizer determined by the coding order pattern.
   if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
-      cpi->oxcf.rc_mode != RC_MODE_CONSTANT_QUALITY) {
+      cpi->oxcf.rc_mode != VPX_Q) {
     double new_q;
     double current_q = vp9_convert_qindex_to_q(active_worst_quality);
     int level = cpi->this_frame_weight;
@@ -859,7 +859,7 @@
       q = active_worst_quality;
     }
     // For constrained quality dont allow Q less than the cq level
-    if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) {
+    if (oxcf->rc_mode == VPX_CQ) {
       if (q < cq_level)
         q = cq_level;
 
@@ -871,7 +871,7 @@
       // Constrained quality use slightly lower active best.
       active_best_quality = active_best_quality * 15 / 16;
 
-    } else if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
+    } else if (oxcf->rc_mode == VPX_Q) {
       if (!cpi->refresh_alt_ref_frame) {
         active_best_quality = cq_level;
       } else {
@@ -885,14 +885,14 @@
           arfgf_low_motion_minq, arfgf_high_motion_minq);
     }
   } else {
-    if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
+    if (oxcf->rc_mode == VPX_Q) {
       active_best_quality = cq_level;
     } else {
       active_best_quality = inter_minq[active_worst_quality];
 
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
-      if ((oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) &&
+      if ((oxcf->rc_mode == VPX_CQ) &&
           (active_best_quality < cq_level)) {
         active_best_quality = cq_level;
       }
@@ -919,7 +919,7 @@
       qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
                                           active_worst_quality, 2.0);
     } else if (!rc->is_src_frame_alt_ref &&
-               (oxcf->rc_mode != RC_MODE_CBR) &&
+               (oxcf->rc_mode != VPX_CBR) &&
                (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
       qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
                                           active_worst_quality, 1.75);
@@ -929,7 +929,7 @@
   }
 #endif
 
-  if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
+  if (oxcf->rc_mode == VPX_Q) {
     q = active_best_quality;
   // Special case code to try and match quality with forced key frames.
   } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
@@ -948,7 +948,7 @@
 #if CONFIG_MULTIPLE_ARF
   // Force the quantizer determined by the coding order pattern.
   if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
-      cpi->oxcf.rc_mode != RC_MODE_CONSTANT_QUALITY) {
+      cpi->oxcf.rc_mode != VPX_Q) {
     double new_q;
     double current_q = vp9_convert_qindex_to_q(active_worst_quality);
     int level = cpi->this_frame_weight;
@@ -974,7 +974,7 @@
                              int *bottom_index, int *top_index) {
   int q;
   if (cpi->pass == 0) {
-    if (cpi->oxcf.rc_mode == RC_MODE_CBR)
+    if (cpi->oxcf.rc_mode == VPX_CBR)
       q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index);
     else
       q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index);
@@ -997,7 +997,7 @@
                                       int frame_target,
                                       int *frame_under_shoot_limit,
                                       int *frame_over_shoot_limit) {
-  if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) {
+  if (cpi->oxcf.rc_mode == VPX_Q) {
     *frame_under_shoot_limit = 0;
     *frame_over_shoot_limit  = INT_MAX;
   } else {
@@ -1072,7 +1072,7 @@
   // Post encode loop adjustment of Q prediction.
   vp9_rc_update_rate_correction_factors(
       cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
-            oxcf->rc_mode == RC_MODE_CBR) ? 2 : 0);
+            oxcf->rc_mode == VPX_CBR) ? 2 : 0);
 
   // Keep a record of last Q and ambient average Q.
   if (cm->frame_type == KEY_FRAME) {
@@ -1082,7 +1082,7 @@
   } else {
     if (rc->is_src_frame_alt_ref ||
         !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) ||
-        (cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) {
+        (cpi->use_svc && oxcf->rc_mode == VPX_CBR)) {
       rc->last_q[INTER_FRAME] = qindex;
       rc->avg_frame_qindex[INTER_FRAME] =
         ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
@@ -1225,7 +1225,7 @@
   int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
   int target = rc->avg_frame_bandwidth;
   if (svc->number_temporal_layers > 1 &&
-      oxcf->rc_mode == RC_MODE_CBR) {
+      oxcf->rc_mode == VPX_CBR) {
     // Note that for layers, avg_frame_bandwidth is the cumulative
     // per-frame-bandwidth. For the target size of this frame, use the
     // layer average frame size (i.e., non-cumulative per-frame-bw).
@@ -1258,7 +1258,7 @@
     int kf_boost = 32;
     double framerate = oxcf->framerate;
     if (svc->number_temporal_layers > 1 &&
-        oxcf->rc_mode == RC_MODE_CBR) {
+        oxcf->rc_mode == VPX_CBR) {
       // Use the layer framerate for temporal layers CBR mode.
       const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id];
       framerate = lc->framerate;
@@ -1288,7 +1288,7 @@
       cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1;
     }
 
-    if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
+    if (cpi->pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
       target = calc_iframe_target_size_one_pass_cbr(cpi);
     }
   } else {
@@ -1303,7 +1303,7 @@
       }
     }
 
-    if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
+    if (cpi->pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
       target = calc_pframe_target_size_one_pass_cbr(cpi);
     }
   }

diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 72768e1..edd59ab 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c

@@ -318,14 +318,7 @@
 
   oxcf->lag_in_frames = cfg->g_pass == VPX_RC_FIRST_PASS ? 0
                                                          : cfg->g_lag_in_frames;
-
-  oxcf->rc_mode = RC_MODE_VBR;
-  if (cfg->rc_end_usage == VPX_CQ)
-    oxcf->rc_mode = RC_MODE_CONSTRAINED_QUALITY;
-  else if (cfg->rc_end_usage == VPX_Q)
-    oxcf->rc_mode = RC_MODE_CONSTANT_QUALITY;
-  else if (cfg->rc_end_usage == VPX_CBR)
-    oxcf->rc_mode = RC_MODE_CBR;
+  oxcf->rc_mode = cfg->rc_end_usage;
 
   // Convert target bandwidth from Kbit/s to Bit/s
   oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate;

diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 98faa7f..3b5d4bf 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c

@@ -40,6 +40,7 @@
   vpx_image_t             img;
   int                     invert_tile_order;
   int                     frame_parallel_decode;  // frame-based threading.
+  int                     last_show_frame;  // Index of last output frame.
 
   // External frame buffer info to save for VP9 common.
   void *ext_priv;  // Private data associated with the external frame buffers.
@@ -238,6 +239,7 @@
   ctx->pbi->max_threads = ctx->cfg.threads;
   ctx->pbi->inv_tile_order = ctx->invert_tile_order;
   ctx->pbi->frame_parallel_decode = ctx->frame_parallel_decode;
+  ctx->last_show_frame = -1;
 
   // If postprocessing was enabled by the application and a
   // configuration has not been provided, default it.
@@ -319,31 +321,33 @@
     const uint32_t mag = ((marker >> 3) & 0x3) + 1;
     const size_t index_sz = 2 + mag * frames;
 
-    uint8_t marker2 = read_marker(decrypt_cb, decrypt_state,
-                                  data + data_sz - index_sz);
+    if (data_sz >= index_sz) {
+      uint8_t marker2 = read_marker(decrypt_cb, decrypt_state,
+                                    data + data_sz - index_sz);
 
-    if (data_sz >= index_sz && marker2 == marker) {
-      // found a valid superframe index
-      uint32_t i, j;
-      const uint8_t *x = &data[data_sz - index_sz + 1];
+      if (marker == marker2) {
+        // Found a valid superframe index.
+        uint32_t i, j;
+        const uint8_t *x = &data[data_sz - index_sz + 1];
 
-      // frames has a maximum of 8 and mag has a maximum of 4.
-      uint8_t clear_buffer[32];
-      assert(sizeof(clear_buffer) >= frames * mag);
-      if (decrypt_cb) {
-        decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
-        x = clear_buffer;
+        // Frames has a maximum of 8 and mag has a maximum of 4.
+        uint8_t clear_buffer[32];
+        assert(sizeof(clear_buffer) >= frames * mag);
+        if (decrypt_cb) {
+          decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
+          x = clear_buffer;
+        }
+
+        for (i = 0; i < frames; ++i) {
+          uint32_t this_sz = 0;
+
+          for (j = 0; j < mag; ++j)
+            this_sz |= (*x++) << (j * 8);
+          sizes[i] = this_sz;
+        }
+
+        *count = frames;
       }
-
-      for (i = 0; i < frames; i++) {
-        uint32_t this_sz = 0;
-
-        for (j = 0; j < mag; j++)
-          this_sz |= (*x++) << (j * 8);
-        sizes[i] = this_sz;
-      }
-
-      *count = frames;
     }
   }
 }
@@ -430,6 +434,15 @@
       ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
       img = &ctx->img;
       *iter = img;
+      // Decrease reference count of last output frame in frame parallel mode.
+      if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) {
+        --cm->frame_bufs[ctx->last_show_frame].ref_count;
+        if (cm->frame_bufs[ctx->last_show_frame].ref_count == 0) {
+          cm->release_fb_cb(cm->cb_priv,
+              &cm->frame_bufs[ctx->last_show_frame].raw_frame_buffer);
+        }
+      }
+      ctx->last_show_frame = ctx->pbi->common.new_fb_idx;
     }
   }
 

diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 7b43eec..614602a 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c

@@ -84,11 +84,12 @@
 static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) {
   const int c_w = ybf->uv_crop_width;
   const int c_h = ybf->uv_crop_height;
-  const int c_ext_size = ext_size >> 1;
-  const int c_et = c_ext_size;
-  const int c_el = c_ext_size;
-  const int c_eb = c_ext_size + ybf->uv_height - ybf->uv_crop_height;
-  const int c_er = c_ext_size + ybf->uv_width - ybf->uv_crop_width;
+  const int ss_x = ybf->uv_width < ybf->y_width;
+  const int ss_y = ybf->uv_height < ybf->y_height;
+  const int c_et = ext_size >> ss_y;
+  const int c_el = ext_size >> ss_x;
+  const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height;
+  const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width;
 
   assert(ybf->y_height - ybf->y_crop_height < 16);
   assert(ybf->y_width - ybf->y_crop_width < 16);

diff --git a/vpx_scale/mips/dspr2/yv12extend_dspr2.c b/vpx_scale/mips/dspr2/yv12extend_dspr2.c
index 26558b0..0dfc47c 100644
--- a/vpx_scale/mips/dspr2/yv12extend_dspr2.c
+++ b/vpx_scale/mips/dspr2/yv12extend_dspr2.c

@@ -104,16 +104,15 @@
   }
 }
 
-static void extend_frame(YV12_BUFFER_CONFIG *const ybf,
-                         int subsampling_x, int subsampling_y,
-                         int ext_size) {
+static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) {
   const int c_w = ybf->uv_crop_width;
   const int c_h = ybf->uv_crop_height;
-  const int c_ext_size = ext_size >> 1;
-  const int c_et = c_ext_size;
-  const int c_el = c_ext_size;
-  const int c_eb = c_ext_size + ybf->uv_height - ybf->uv_crop_height;
-  const int c_er = c_ext_size + ybf->uv_width - ybf->uv_crop_width;
+  const int ss_x = ybf->uv_width < ybf->y_width;
+  const int ss_y = ybf->uv_height < ybf->y_height;
+  const int c_et = ext_size >> ss_y;
+  const int c_el = ext_size >> ss_x;
+  const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height;
+  const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width;
 
   assert(ybf->y_height - ybf->y_crop_height < 16);
   assert(ybf->y_width - ybf->y_crop_width < 16);
@@ -133,16 +132,13 @@
                c_w, c_h, c_et, c_el, c_eb, c_er);
 }
 
-void vp9_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
-                                int subsampling_x, int subsampling_y) {
-  extend_frame(ybf, subsampling_x, subsampling_y, ybf->border);
+void vp9_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf) {
+  extend_frame(ybf, ybf->border);
 }
 
-void vp9_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
-                                          int subsampling_x,
-                                          int subsampling_y) {
+void vp9_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf) {
   const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ?
                        VP9INNERBORDERINPIXELS : ybf->border;
-  extend_frame(ybf, subsampling_x, subsampling_y, inner_bw);
+  extend_frame(ybf, inner_bw);
 }
 #endif