Merge "VPX: x86 asm version of vpx_idct32x32_34_add()"

commit: f48321974bf1bc62494fd4fb983eb745b48a184e [log] [tgz]
author: Scott LaVarnway <slavarnway@google.com> Tue Nov 10 21:40:11 2015 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Tue Nov 10 21:40:11 2015 +0000
tree: f5240e17b6bb7233b237a081286df3a8acf8e0f7
parent: 19272d866bde3a8c3f9c8395ba38e8ff3b42bb5a [diff]
parent: 9aeaa2016e7470c4e316d90da33d883098eed6f4 [diff]
diff --git a/.mailmap b/.mailmap
index 0bfda12..42f3617 100644
--- a/.mailmap
+++ b/.mailmap

@@ -1,14 +1,21 @@
 Adrian Grange <agrange@google.com>
-Alex Converse <aconverse@google.com> <alex.converse@gmail.com>
+Adrian Grange <agrange@google.com> <agrange@agrange-macbookpro.roam.corp.google.com>
+Aℓex Converse <aconverse@google.com>
+Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
 Alpha Lam <hclam@google.com> <hclam@chromium.org>
 Deb Mukherjee <debargha@google.com>
 Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
 Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
 Hangyu Kuang <hkuang@google.com>
+Hangyu Kuang <hkuang@google.com> <hkuang@hkuang-macbookpro.roam.corp.google.com>
+Hui Su <huisu@google.com>
+Jacky Chen <jackychen@google.com>
 Jim Bankoski <jimbankoski@google.com>
 Johann Koenig <johannkoenig@google.com>
 Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
+Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
+Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
 John Koleszar <jkoleszar@google.com>
 Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
 Marco Paniconi <marpan@google.com>
@@ -17,10 +24,13 @@
 Paul Wilkins <paulwilkins@google.com>
 Ralph Giles <giles@xiph.org> <giles@entropywave.com>
 Ralph Giles <giles@xiph.org> <giles@mozilla.com>
+Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
 Sami Pietilä <samipietila@google.com>
 Tamar Levy <tamar.levy@intel.com>
 Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
 Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
 Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
 Tom Finegan <tomfinegan@google.com>
+Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
 Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
+Yaowu Xu <yaowu@google.com> <yaowu@YAOWU2-W.ad.corp.google.com>

diff --git a/AUTHORS b/AUTHORS
index 2f63d7c..f89b677 100644
--- a/AUTHORS
+++ b/AUTHORS

@@ -5,9 +5,9 @@
 Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
 Adam Xu <adam@xuyaowu.com>
 Adrian Grange <agrange@google.com>
+Aℓex Converse <aconverse@google.com>
 Ahmad Sharif <asharif@google.com>
 Alexander Voronov <avoronov@graphics.cs.msu.ru>
-Alex Converse <aconverse@google.com>
 Alexis Ballier <aballier@gentoo.org>
 Alok Ahuja <waveletcoeff@gmail.com>
 Alpha Lam <hclam@google.com>
@@ -16,8 +16,10 @@
 Andoni Morales Alastruey <ylatuya@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
 Andrew Russell <anrussell@google.com>
+Angie Chiang <angiebird@google.com>
 Aron Rosenberg <arosenberg@logitech.com>
 Attila Nagy <attilanagy@google.com>
+Brion Vibber <bvibber@wikimedia.org>
 changjun.yang <changjun.yang@intel.com>
 Charles 'Buck' Krasic <ckrasic@google.com>
 chm <chm@rock-chips.com>
@@ -27,6 +29,7 @@
 Dim Temp <dimtemp0@gmail.com>
 Dmitry Kovalev <dkovalev@google.com>
 Dragan Mrdjan <dmrdjan@mips.com>
+Ed Baker <edward.baker@intel.com>
 Ehsan Akhgari <ehsan.akhgari@gmail.com>
 Erik Niemeyer <erik.a.niemeyer@intel.com>
 Fabio Pedretti <fabio.ped@libero.it>
@@ -34,6 +37,8 @@
 Fredrik Söderquist <fs@opera.com>
 Fritz Koenig <frkoenig@google.com>
 Gaute Strokkenes <gaute.strokkenes@broadcom.com>
+Geza Lore <gezalore@gmail.com>
+Ghislain MARY <ghislainmary2@gmail.com>
 Giuseppe Scrivano <gscrivano@gnu.org>
 Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
 Guillaume Martres <gmartres@google.com>
@@ -44,7 +49,7 @@
 Hui Su <huisu@google.com>
 Ivan Maltz <ivanmaltz@google.com>
 Jacek Caban <cjacek@gmail.com>
-JackyChen <jackychen@google.com>
+Jacky Chen <jackychen@google.com>
 James Berry <jamesberry@google.com>
 James Yu <james.yu@linaro.org>
 James Zern <jzern@google.com>
@@ -60,9 +65,11 @@
 Joey Parrish <joeyparrish@google.com>
 Johann Koenig <johannkoenig@google.com>
 John Koleszar <jkoleszar@google.com>
+Johnny Klonaris <google@jawknee.com>
 John Stark <jhnstrk@gmail.com>
 Joshua Bleecher Snyder <josh@treelinelabs.com>
 Joshua Litt <joshualitt@google.com>
+Julia Robson <juliamrobson@gmail.com>
 Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
 KO Myung-Hun <komh@chollian.net>
@@ -82,6 +89,7 @@
 Mikhal Shemer <mikhal@google.com>
 Minghai Shang <minghai@google.com>
 Morton Jonuschat <yabawock@gmail.com>
+Nico Weber <thakis@chromium.org>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
 Patrik Westin <patrik.westin@gmail.com>
@@ -96,7 +104,7 @@
 Rafaël Carré <funman@videolan.org>
 Ralph Giles <giles@xiph.org>
 Rob Bradford <rob@linux.intel.com>
-Ronald S. Bultje <rbultje@google.com>
+Ronald S. Bultje <rsbultje@gmail.com>
 Rui Ueyama <ruiu@google.com>
 Sami Pietilä <samipietila@google.com>
 Scott Graham <scottmg@chromium.org>
@@ -104,6 +112,7 @@
 Sean McGovern <gseanmcg@gmail.com>
 Sergey Ulanov <sergeyu@chromium.org>
 Shimon Doodkin <helpmepro1@gmail.com>
+Shunyao Li <shunyaoli@google.com>
 Stefan Holmer <holmer@google.com>
 Suman Sunkara <sunkaras@google.com>
 Taekhyun Kim <takim@nvidia.com>

diff --git a/CHANGELOG b/CHANGELOG
index b0d3064..7746cc6 100644
--- a/CHANGELOG
+++ b/CHANGELOG

@@ -1,7 +1,19 @@
-xxxx-yy-zz v1.4.0 "Changes for next release"
-  vpxenc is changed to use VP9 by default.
-  Encoder controls added for 1 pass SVC.
-  Decoder control to toggle on/off loopfilter.
+2015-11-09 v1.5.0 "Javan Whistling Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.4.0. It drops deprecated VP8
+    controls and adds a variety of VP9 controls for testing.
+
+    The vpxenc utility now prefers VP9 by default.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding
+    Smaller library size by combining functions used by VP8 and VP9
+
+  - Bug Fixes:
+    A variety of fuzzing issues
 
 2015-04-03 v1.4.0 "Indian Runner Duck"
   This release includes significant improvements to the VP9 codec.

diff --git a/build/make/configure.sh b/build/make/configure.sh
index c592b63..37ed86f 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh

@@ -688,6 +688,10 @@
         tgt_isa=x86_64
         tgt_os=darwin14
         ;;
+      *darwin15*)
+        tgt_isa=x86_64
+        tgt_os=darwin15
+        ;;
       x86_64*mingw32*)
         tgt_os=win64
         ;;
@@ -795,6 +799,10 @@
       add_cflags  "-mmacosx-version-min=10.10"
       add_ldflags "-mmacosx-version-min=10.10"
       ;;
+    *-darwin15-*)
+      add_cflags  "-mmacosx-version-min=10.11"
+      add_ldflags "-mmacosx-version-min=10.11"
+      ;;
     *-iphonesimulator-*)
       add_cflags  "-miphoneos-version-min=${IOS_VERSION_MIN}"
       add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
@@ -979,8 +987,10 @@
                 awk '{ print $1 }' | tail -1`
           fi
 
-          add_cflags "--sysroot=${alt_libc}"
-          add_ldflags "--sysroot=${alt_libc}"
+          if [ -d "${alt_libc}" ]; then
+            add_cflags "--sysroot=${alt_libc}"
+            add_ldflags "--sysroot=${alt_libc}"
+          fi
 
           # linker flag that routes around a CPU bug in some
           # Cortex-A8 implementations (NDK Dev Guide)
@@ -1207,24 +1217,28 @@
       check_gcc_machine_option avx
       check_gcc_machine_option avx2
 
-      case "${AS}" in
-        auto|"")
-          which nasm >/dev/null 2>&1 && AS=nasm
-          which yasm >/dev/null 2>&1 && AS=yasm
-          if [ "${AS}" = nasm ] ; then
-            # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit
-            # this check if they start shipping a compatible version.
-            apple=`nasm -v | grep "Apple"`
-            [ -n "${apple}" ] \
-              && echo "Unsupported version of nasm: ${apple}" \
-              && AS=""
-          fi
-          [ "${AS}" = auto ] || [ -z "${AS}" ] \
-            && die "Neither yasm nor nasm have been found." \
-                   "See the prerequisites section in the README for more info."
-          ;;
-      esac
-      log_echo "  using $AS"
+      if enabled external_build; then
+        log_echo "  skipping assembler detection"
+      else
+        case "${AS}" in
+          auto|"")
+            which nasm >/dev/null 2>&1 && AS=nasm
+            which yasm >/dev/null 2>&1 && AS=yasm
+            if [ "${AS}" = nasm ] ; then
+              # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit
+              # this check if they start shipping a compatible version.
+              apple=`nasm -v | grep "Apple"`
+              [ -n "${apple}" ] \
+                && echo "Unsupported version of nasm: ${apple}" \
+                && AS=""
+            fi
+            [ "${AS}" = auto ] || [ -z "${AS}" ] \
+              && die "Neither yasm nor nasm have been found." \
+                     "See the prerequisites section in the README for more info."
+            ;;
+        esac
+        log_echo "  using $AS"
+      fi
       [ "${AS##*/}" = nasm ] && add_asflags -Ox
       AS_SFX=.asm
       case  ${tgt_os} in

diff --git a/configure b/configure
index beedbdd..24992c4 100755
--- a/configure
+++ b/configure

@@ -122,6 +122,7 @@
 all_platforms="${all_platforms} x86-darwin12-gcc"
 all_platforms="${all_platforms} x86-darwin13-gcc"
 all_platforms="${all_platforms} x86-darwin14-gcc"
+all_platforms="${all_platforms} x86-darwin15-gcc"
 all_platforms="${all_platforms} x86-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
@@ -142,6 +143,7 @@
 all_platforms="${all_platforms} x86_64-darwin12-gcc"
 all_platforms="${all_platforms} x86_64-darwin13-gcc"
 all_platforms="${all_platforms} x86_64-darwin14-gcc"
+all_platforms="${all_platforms} x86_64-darwin15-gcc"
 all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
@@ -265,7 +267,6 @@
     fp_mb_stats
     emulate_hardware
     misc_fixes
-    universal_hp
 "
 CONFIG_LIST="
     dependency_tracking

diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c
index b26e987..d2b3688 100644
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c

@@ -408,7 +408,10 @@
     for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
       const int layer = sl * cfg->ts_number_layers + tl;
       const int tlayer0 = sl * cfg->ts_number_layers;
-      rc->layer_framerate[layer] =
+      if (cfg->ts_number_layers == 1)
+        rc->layer_framerate[layer] = framerate;
+      else
+        rc->layer_framerate[layer] =
           framerate / cfg->ts_rate_decimator[tl];
       if (tl > 0) {
         rc->layer_pfb[layer] = 1000.0 *
@@ -714,6 +717,7 @@
     // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
     // mode to "VP9E_LAYERING_MODE_BYPASS".
     if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+      layer_id.spatial_layer_id = 0;
       // Example for 2 temporal layers.
       if (frame_cnt % 2 == 0)
         layer_id.temporal_layer_id = 0;
@@ -729,6 +733,12 @@
                                   &ref_frame_config);
       vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG,
                         &ref_frame_config);
+      // Keep track of input frames, to account for frame drops in rate control
+      // stats/metrics.
+      for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+        ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+                                layer_id.temporal_layer_id];
+      }
     }
 
     vpx_usec_timer_start(&timer);
@@ -761,9 +771,16 @@
               vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
               parse_superframe_index(cx_pkt->data.frame.buf,
                                      cx_pkt->data.frame.sz, sizes, &count);
-              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
-                                        layer_id.temporal_layer_id];
+              // Note computing input_layer_frames here won't account for frame
+              // drops in rate control stats.
+              // TODO(marpan): Fix this for non-bypass mode so we can get stats
+              // for dropped frames.
+              if (svc_ctx.temporal_layering_mode !=
+                  VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+                                         layer_id.temporal_layer_id];
+                }
               }
               for (tl = layer_id.temporal_layer_id;
                   tl < enc_cfg.ts_number_layers; ++tl) {
@@ -854,6 +871,16 @@
       pts += frame_duration;
     }
   }
+
+  // Compensate for the extra frame count for the bypass mode.
+  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+    for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+      const int layer = sl * enc_cfg.ts_number_layers +
+          layer_id.temporal_layer_id;
+      --rc.layer_input_frames[layer];
+    }
+  }
+
   printf("Processed %d frames\n", frame_cnt);
   fclose(infile);
 #if OUTPUT_RC_STATS

diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c
index ee7de6b..5adda9e 100644
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c

@@ -684,14 +684,14 @@
   if (strncmp(encoder->name, "vp8", 3) == 0) {
     vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
     vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
-    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
   } else if (strncmp(encoder->name, "vp9", 3) == 0) {
     vpx_svc_extra_cfg_t svc_params;
     vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
     vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
     vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
     vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
-    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
     vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
     vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
     if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))

diff --git a/libs.mk b/libs.mk
index c65df6b..f28d84a 100644
--- a/libs.mk
+++ b/libs.mk

@@ -260,7 +260,7 @@
 LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
 
-SO_VERSION_MAJOR := 2
+SO_VERSION_MAJOR := 3
 SO_VERSION_MINOR := 0
 SO_VERSION_PATCH := 0
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))

diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index b6cae79..7d8a0ab 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc

@@ -538,7 +538,7 @@
         << " The first dropped frame for drop_thresh " << i
         << " > first dropped frame for drop_thresh "
         << i - kDropFrameThreshTestStep;
-    ASSERT_GE(num_drops_, last_num_drops * 0.90)
+    ASSERT_GE(num_drops_, last_num_drops * 0.85)
         << " The number of dropped frames for drop_thresh " << i
         << " < number of dropped frames for drop_thresh "
         << i - kDropFrameThreshTestStep;

diff --git a/test/resize_test.cc b/test/resize_test.cc
index 98b6f87..dd184d1 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc

@@ -286,11 +286,11 @@
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
 
-class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
+class ResizeRealtimeTest : public ::libvpx_test::EncoderTest,
   public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
  protected:
-  ResizeInternalRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
-  virtual ~ResizeInternalRealtimeTest() {}
+  ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ResizeRealtimeTest() {}
 
   virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                   libvpx_test::Encoder *encoder) {
@@ -318,8 +318,6 @@
   }
 
   void DefaultConfig() {
-    cfg_.g_w = 352;
-    cfg_.g_h = 288;
     cfg_.rc_buf_initial_sz = 500;
     cfg_.rc_buf_optimal_sz = 600;
     cfg_.rc_buf_sz = 1000;
@@ -346,13 +344,34 @@
   bool change_bitrate_;
 };
 
+TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
+  ResizingVideoSource video;
+  DefaultConfig();
+  change_bitrate_ = false;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const unsigned int frame = static_cast<unsigned>(info->pts);
+    const unsigned int expected_w = ScaleForFrameNumber(frame, kInitialWidth);
+    const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);
+
+    EXPECT_EQ(expected_w, info->w)
+        << "Frame " << frame << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h)
+        << "Frame " << frame << " had unexpected height";
+  }
+}
+
 // Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
 // Run at low bitrate, with resize_allowed = 1, and verify that we get
 // one resize down event.
-TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) {
+TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
   ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 299);
   DefaultConfig();
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
   change_bitrate_ = false;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 
@@ -378,10 +397,12 @@
 // Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
 // Start at low target bitrate, raise the bitrate in the middle of the clip,
 // scaling-up should occur after bitrate changed.
-TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
   ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 299);
+                                       30, 1, 0, 359);
   DefaultConfig();
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
   change_bitrate_ = true;
   // Disable dropped frames.
   cfg_.rc_dropframe_thresh = 0;
@@ -524,7 +545,7 @@
                           ::testing::Values(::libvpx_test::kRealTime));
 VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
                           ::testing::Values(::libvpx_test::kOnePassBest));
-VP9_INSTANTIATE_TEST_CASE(ResizeInternalRealtimeTest,
+VP9_INSTANTIATE_TEST_CASE(ResizeRealtimeTest,
                           ::testing::Values(::libvpx_test::kRealTime),
                           ::testing::Range(5, 9));
 VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,

diff --git a/test/sad_test.cc b/test/sad_test.cc
index e6a5e0b..9dec3cb 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc

@@ -484,260 +484,176 @@
 
 //------------------------------------------------------------------------------
 // C functions
-const SadMxNFunc sad64x64_c = vpx_sad64x64_c;
-const SadMxNFunc sad64x32_c = vpx_sad64x32_c;
-const SadMxNFunc sad32x64_c = vpx_sad32x64_c;
-const SadMxNFunc sad32x32_c = vpx_sad32x32_c;
-const SadMxNFunc sad32x16_c = vpx_sad32x16_c;
-const SadMxNFunc sad16x32_c = vpx_sad16x32_c;
-const SadMxNFunc sad16x16_c = vpx_sad16x16_c;
-const SadMxNFunc sad16x8_c = vpx_sad16x8_c;
-const SadMxNFunc sad8x16_c = vpx_sad8x16_c;
-const SadMxNFunc sad8x8_c = vpx_sad8x8_c;
-const SadMxNFunc sad8x4_c = vpx_sad8x4_c;
-const SadMxNFunc sad4x8_c = vpx_sad4x8_c;
-const SadMxNFunc sad4x4_c = vpx_sad4x4_c;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNFunc highbd_sad64x64_c = vpx_highbd_sad64x64_c;
-const SadMxNFunc highbd_sad64x32_c = vpx_highbd_sad64x32_c;
-const SadMxNFunc highbd_sad32x64_c = vpx_highbd_sad32x64_c;
-const SadMxNFunc highbd_sad32x32_c = vpx_highbd_sad32x32_c;
-const SadMxNFunc highbd_sad32x16_c = vpx_highbd_sad32x16_c;
-const SadMxNFunc highbd_sad16x32_c = vpx_highbd_sad16x32_c;
-const SadMxNFunc highbd_sad16x16_c = vpx_highbd_sad16x16_c;
-const SadMxNFunc highbd_sad16x8_c = vpx_highbd_sad16x8_c;
-const SadMxNFunc highbd_sad8x16_c = vpx_highbd_sad8x16_c;
-const SadMxNFunc highbd_sad8x8_c = vpx_highbd_sad8x8_c;
-const SadMxNFunc highbd_sad8x4_c = vpx_highbd_sad8x4_c;
-const SadMxNFunc highbd_sad4x8_c = vpx_highbd_sad4x8_c;
-const SadMxNFunc highbd_sad4x4_c = vpx_highbd_sad4x4_c;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 const SadMxNParam c_tests[] = {
-  make_tuple(64, 64, sad64x64_c, -1),
-  make_tuple(64, 32, sad64x32_c, -1),
-  make_tuple(32, 64, sad32x64_c, -1),
-  make_tuple(32, 32, sad32x32_c, -1),
-  make_tuple(32, 16, sad32x16_c, -1),
-  make_tuple(16, 32, sad16x32_c, -1),
-  make_tuple(16, 16, sad16x16_c, -1),
-  make_tuple(16, 8, sad16x8_c, -1),
-  make_tuple(8, 16, sad8x16_c, -1),
-  make_tuple(8, 8, sad8x8_c, -1),
-  make_tuple(8, 4, sad8x4_c, -1),
-  make_tuple(4, 8, sad4x8_c, -1),
-  make_tuple(4, 4, sad4x4_c, -1),
+  make_tuple(64, 64, &vpx_sad64x64_c, -1),
+  make_tuple(64, 32, &vpx_sad64x32_c, -1),
+  make_tuple(32, 64, &vpx_sad32x64_c, -1),
+  make_tuple(32, 32, &vpx_sad32x32_c, -1),
+  make_tuple(32, 16, &vpx_sad32x16_c, -1),
+  make_tuple(16, 32, &vpx_sad16x32_c, -1),
+  make_tuple(16, 16, &vpx_sad16x16_c, -1),
+  make_tuple(16, 8, &vpx_sad16x8_c, -1),
+  make_tuple(8, 16, &vpx_sad8x16_c, -1),
+  make_tuple(8, 8, &vpx_sad8x8_c, -1),
+  make_tuple(8, 4, &vpx_sad8x4_c, -1),
+  make_tuple(4, 8, &vpx_sad4x8_c, -1),
+  make_tuple(4, 4, &vpx_sad4x4_c, -1),
 #if CONFIG_VP9_HIGHBITDEPTH
-  make_tuple(64, 64, highbd_sad64x64_c, 8),
-  make_tuple(64, 32, highbd_sad64x32_c, 8),
-  make_tuple(32, 64, highbd_sad32x64_c, 8),
-  make_tuple(32, 32, highbd_sad32x32_c, 8),
-  make_tuple(32, 16, highbd_sad32x16_c, 8),
-  make_tuple(16, 32, highbd_sad16x32_c, 8),
-  make_tuple(16, 16, highbd_sad16x16_c, 8),
-  make_tuple(16, 8, highbd_sad16x8_c, 8),
-  make_tuple(8, 16, highbd_sad8x16_c, 8),
-  make_tuple(8, 8, highbd_sad8x8_c, 8),
-  make_tuple(8, 4, highbd_sad8x4_c, 8),
-  make_tuple(4, 8, highbd_sad4x8_c, 8),
-  make_tuple(4, 4, highbd_sad4x4_c, 8),
-  make_tuple(64, 64, highbd_sad64x64_c, 10),
-  make_tuple(64, 32, highbd_sad64x32_c, 10),
-  make_tuple(32, 64, highbd_sad32x64_c, 10),
-  make_tuple(32, 32, highbd_sad32x32_c, 10),
-  make_tuple(32, 16, highbd_sad32x16_c, 10),
-  make_tuple(16, 32, highbd_sad16x32_c, 10),
-  make_tuple(16, 16, highbd_sad16x16_c, 10),
-  make_tuple(16, 8, highbd_sad16x8_c, 10),
-  make_tuple(8, 16, highbd_sad8x16_c, 10),
-  make_tuple(8, 8, highbd_sad8x8_c, 10),
-  make_tuple(8, 4, highbd_sad8x4_c, 10),
-  make_tuple(4, 8, highbd_sad4x8_c, 10),
-  make_tuple(4, 4, highbd_sad4x4_c, 10),
-  make_tuple(64, 64, highbd_sad64x64_c, 12),
-  make_tuple(64, 32, highbd_sad64x32_c, 12),
-  make_tuple(32, 64, highbd_sad32x64_c, 12),
-  make_tuple(32, 32, highbd_sad32x32_c, 12),
-  make_tuple(32, 16, highbd_sad32x16_c, 12),
-  make_tuple(16, 32, highbd_sad16x32_c, 12),
-  make_tuple(16, 16, highbd_sad16x16_c, 12),
-  make_tuple(16, 8, highbd_sad16x8_c, 12),
-  make_tuple(8, 16, highbd_sad8x16_c, 12),
-  make_tuple(8, 8, highbd_sad8x8_c, 12),
-  make_tuple(8, 4, highbd_sad8x4_c, 12),
-  make_tuple(4, 8, highbd_sad4x8_c, 12),
-  make_tuple(4, 4, highbd_sad4x4_c, 12),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_c, 8),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_c, 8),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_c, 8),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_c, 8),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_c, 8),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_c, 8),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_c, 8),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_c, 8),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_c, 8),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_c, 8),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_c, 8),
+  make_tuple(4, 8, &vpx_highbd_sad4x8_c, 8),
+  make_tuple(4, 4, &vpx_highbd_sad4x4_c, 8),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_c, 10),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_c, 10),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_c, 10),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_c, 10),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_c, 10),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_c, 10),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_c, 10),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_c, 10),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_c, 10),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_c, 10),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_c, 10),
+  make_tuple(4, 8, &vpx_highbd_sad4x8_c, 10),
+  make_tuple(4, 4, &vpx_highbd_sad4x4_c, 10),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_c, 12),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_c, 12),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_c, 12),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_c, 12),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_c, 12),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_c, 12),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_c, 12),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_c, 12),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_c, 12),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_c, 12),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_c, 12),
+  make_tuple(4, 8, &vpx_highbd_sad4x8_c, 12),
+  make_tuple(4, 4, &vpx_highbd_sad4x4_c, 12),
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
 
-const SadMxNAvgFunc sad64x64_avg_c = vpx_sad64x64_avg_c;
-const SadMxNAvgFunc sad64x32_avg_c = vpx_sad64x32_avg_c;
-const SadMxNAvgFunc sad32x64_avg_c = vpx_sad32x64_avg_c;
-const SadMxNAvgFunc sad32x32_avg_c = vpx_sad32x32_avg_c;
-const SadMxNAvgFunc sad32x16_avg_c = vpx_sad32x16_avg_c;
-const SadMxNAvgFunc sad16x32_avg_c = vpx_sad16x32_avg_c;
-const SadMxNAvgFunc sad16x16_avg_c = vpx_sad16x16_avg_c;
-const SadMxNAvgFunc sad16x8_avg_c = vpx_sad16x8_avg_c;
-const SadMxNAvgFunc sad8x16_avg_c = vpx_sad8x16_avg_c;
-const SadMxNAvgFunc sad8x8_avg_c = vpx_sad8x8_avg_c;
-const SadMxNAvgFunc sad8x4_avg_c = vpx_sad8x4_avg_c;
-const SadMxNAvgFunc sad4x8_avg_c = vpx_sad4x8_avg_c;
-const SadMxNAvgFunc sad4x4_avg_c = vpx_sad4x4_avg_c;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNAvgFunc highbd_sad64x64_avg_c = vpx_highbd_sad64x64_avg_c;
-const SadMxNAvgFunc highbd_sad64x32_avg_c = vpx_highbd_sad64x32_avg_c;
-const SadMxNAvgFunc highbd_sad32x64_avg_c = vpx_highbd_sad32x64_avg_c;
-const SadMxNAvgFunc highbd_sad32x32_avg_c = vpx_highbd_sad32x32_avg_c;
-const SadMxNAvgFunc highbd_sad32x16_avg_c = vpx_highbd_sad32x16_avg_c;
-const SadMxNAvgFunc highbd_sad16x32_avg_c = vpx_highbd_sad16x32_avg_c;
-const SadMxNAvgFunc highbd_sad16x16_avg_c = vpx_highbd_sad16x16_avg_c;
-const SadMxNAvgFunc highbd_sad16x8_avg_c = vpx_highbd_sad16x8_avg_c;
-const SadMxNAvgFunc highbd_sad8x16_avg_c = vpx_highbd_sad8x16_avg_c;
-const SadMxNAvgFunc highbd_sad8x8_avg_c = vpx_highbd_sad8x8_avg_c;
-const SadMxNAvgFunc highbd_sad8x4_avg_c = vpx_highbd_sad8x4_avg_c;
-const SadMxNAvgFunc highbd_sad4x8_avg_c = vpx_highbd_sad4x8_avg_c;
-const SadMxNAvgFunc highbd_sad4x4_avg_c = vpx_highbd_sad4x4_avg_c;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 const SadMxNAvgParam avg_c_tests[] = {
-  make_tuple(64, 64, sad64x64_avg_c, -1),
-  make_tuple(64, 32, sad64x32_avg_c, -1),
-  make_tuple(32, 64, sad32x64_avg_c, -1),
-  make_tuple(32, 32, sad32x32_avg_c, -1),
-  make_tuple(32, 16, sad32x16_avg_c, -1),
-  make_tuple(16, 32, sad16x32_avg_c, -1),
-  make_tuple(16, 16, sad16x16_avg_c, -1),
-  make_tuple(16, 8, sad16x8_avg_c, -1),
-  make_tuple(8, 16, sad8x16_avg_c, -1),
-  make_tuple(8, 8, sad8x8_avg_c, -1),
-  make_tuple(8, 4, sad8x4_avg_c, -1),
-  make_tuple(4, 8, sad4x8_avg_c, -1),
-  make_tuple(4, 4, sad4x4_avg_c, -1),
+  make_tuple(64, 64, &vpx_sad64x64_avg_c, -1),
+  make_tuple(64, 32, &vpx_sad64x32_avg_c, -1),
+  make_tuple(32, 64, &vpx_sad32x64_avg_c, -1),
+  make_tuple(32, 32, &vpx_sad32x32_avg_c, -1),
+  make_tuple(32, 16, &vpx_sad32x16_avg_c, -1),
+  make_tuple(16, 32, &vpx_sad16x32_avg_c, -1),
+  make_tuple(16, 16, &vpx_sad16x16_avg_c, -1),
+  make_tuple(16, 8, &vpx_sad16x8_avg_c, -1),
+  make_tuple(8, 16, &vpx_sad8x16_avg_c, -1),
+  make_tuple(8, 8, &vpx_sad8x8_avg_c, -1),
+  make_tuple(8, 4, &vpx_sad8x4_avg_c, -1),
+  make_tuple(4, 8, &vpx_sad4x8_avg_c, -1),
+  make_tuple(4, 4, &vpx_sad4x4_avg_c, -1),
 #if CONFIG_VP9_HIGHBITDEPTH
-  make_tuple(64, 64, highbd_sad64x64_avg_c, 8),
-  make_tuple(64, 32, highbd_sad64x32_avg_c, 8),
-  make_tuple(32, 64, highbd_sad32x64_avg_c, 8),
-  make_tuple(32, 32, highbd_sad32x32_avg_c, 8),
-  make_tuple(32, 16, highbd_sad32x16_avg_c, 8),
-  make_tuple(16, 32, highbd_sad16x32_avg_c, 8),
-  make_tuple(16, 16, highbd_sad16x16_avg_c, 8),
-  make_tuple(16, 8, highbd_sad16x8_avg_c, 8),
-  make_tuple(8, 16, highbd_sad8x16_avg_c, 8),
-  make_tuple(8, 8, highbd_sad8x8_avg_c, 8),
-  make_tuple(8, 4, highbd_sad8x4_avg_c, 8),
-  make_tuple(4, 8, highbd_sad4x8_avg_c, 8),
-  make_tuple(4, 4, highbd_sad4x4_avg_c, 8),
-  make_tuple(64, 64, highbd_sad64x64_avg_c, 10),
-  make_tuple(64, 32, highbd_sad64x32_avg_c, 10),
-  make_tuple(32, 64, highbd_sad32x64_avg_c, 10),
-  make_tuple(32, 32, highbd_sad32x32_avg_c, 10),
-  make_tuple(32, 16, highbd_sad32x16_avg_c, 10),
-  make_tuple(16, 32, highbd_sad16x32_avg_c, 10),
-  make_tuple(16, 16, highbd_sad16x16_avg_c, 10),
-  make_tuple(16, 8, highbd_sad16x8_avg_c, 10),
-  make_tuple(8, 16, highbd_sad8x16_avg_c, 10),
-  make_tuple(8, 8, highbd_sad8x8_avg_c, 10),
-  make_tuple(8, 4, highbd_sad8x4_avg_c, 10),
-  make_tuple(4, 8, highbd_sad4x8_avg_c, 10),
-  make_tuple(4, 4, highbd_sad4x4_avg_c, 10),
-  make_tuple(64, 64, highbd_sad64x64_avg_c, 12),
-  make_tuple(64, 32, highbd_sad64x32_avg_c, 12),
-  make_tuple(32, 64, highbd_sad32x64_avg_c, 12),
-  make_tuple(32, 32, highbd_sad32x32_avg_c, 12),
-  make_tuple(32, 16, highbd_sad32x16_avg_c, 12),
-  make_tuple(16, 32, highbd_sad16x32_avg_c, 12),
-  make_tuple(16, 16, highbd_sad16x16_avg_c, 12),
-  make_tuple(16, 8, highbd_sad16x8_avg_c, 12),
-  make_tuple(8, 16, highbd_sad8x16_avg_c, 12),
-  make_tuple(8, 8, highbd_sad8x8_avg_c, 12),
-  make_tuple(8, 4, highbd_sad8x4_avg_c, 12),
-  make_tuple(4, 8, highbd_sad4x8_avg_c, 12),
-  make_tuple(4, 4, highbd_sad4x4_avg_c, 12),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 8),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 8),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 8),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 8),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 8),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 8),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 8),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 8),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 8),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 8),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 8),
+  make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 8),
+  make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 8),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 10),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 10),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 10),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 10),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 10),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 10),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 10),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 10),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 10),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 10),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 10),
+  make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 10),
+  make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 10),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 12),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 12),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 12),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 12),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 12),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 12),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 12),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 12),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 12),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 12),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 12),
+  make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 12),
+  make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 12),
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
 
-const SadMxNx4Func sad64x64x4d_c = vpx_sad64x64x4d_c;
-const SadMxNx4Func sad64x32x4d_c = vpx_sad64x32x4d_c;
-const SadMxNx4Func sad32x64x4d_c = vpx_sad32x64x4d_c;
-const SadMxNx4Func sad32x32x4d_c = vpx_sad32x32x4d_c;
-const SadMxNx4Func sad32x16x4d_c = vpx_sad32x16x4d_c;
-const SadMxNx4Func sad16x32x4d_c = vpx_sad16x32x4d_c;
-const SadMxNx4Func sad16x16x4d_c = vpx_sad16x16x4d_c;
-const SadMxNx4Func sad16x8x4d_c = vpx_sad16x8x4d_c;
-const SadMxNx4Func sad8x16x4d_c = vpx_sad8x16x4d_c;
-const SadMxNx4Func sad8x8x4d_c = vpx_sad8x8x4d_c;
-const SadMxNx4Func sad8x4x4d_c = vpx_sad8x4x4d_c;
-const SadMxNx4Func sad4x8x4d_c = vpx_sad4x8x4d_c;
-const SadMxNx4Func sad4x4x4d_c = vpx_sad4x4x4d_c;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNx4Func highbd_sad64x64x4d_c = vpx_highbd_sad64x64x4d_c;
-const SadMxNx4Func highbd_sad64x32x4d_c = vpx_highbd_sad64x32x4d_c;
-const SadMxNx4Func highbd_sad32x64x4d_c = vpx_highbd_sad32x64x4d_c;
-const SadMxNx4Func highbd_sad32x32x4d_c = vpx_highbd_sad32x32x4d_c;
-const SadMxNx4Func highbd_sad32x16x4d_c = vpx_highbd_sad32x16x4d_c;
-const SadMxNx4Func highbd_sad16x32x4d_c = vpx_highbd_sad16x32x4d_c;
-const SadMxNx4Func highbd_sad16x16x4d_c = vpx_highbd_sad16x16x4d_c;
-const SadMxNx4Func highbd_sad16x8x4d_c = vpx_highbd_sad16x8x4d_c;
-const SadMxNx4Func highbd_sad8x16x4d_c = vpx_highbd_sad8x16x4d_c;
-const SadMxNx4Func highbd_sad8x8x4d_c = vpx_highbd_sad8x8x4d_c;
-const SadMxNx4Func highbd_sad8x4x4d_c = vpx_highbd_sad8x4x4d_c;
-const SadMxNx4Func highbd_sad4x8x4d_c = vpx_highbd_sad4x8x4d_c;
-const SadMxNx4Func highbd_sad4x4x4d_c = vpx_highbd_sad4x4x4d_c;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 const SadMxNx4Param x4d_c_tests[] = {
-  make_tuple(64, 64, sad64x64x4d_c, -1),
-  make_tuple(64, 32, sad64x32x4d_c, -1),
-  make_tuple(32, 64, sad32x64x4d_c, -1),
-  make_tuple(32, 32, sad32x32x4d_c, -1),
-  make_tuple(32, 16, sad32x16x4d_c, -1),
-  make_tuple(16, 32, sad16x32x4d_c, -1),
-  make_tuple(16, 16, sad16x16x4d_c, -1),
-  make_tuple(16, 8, sad16x8x4d_c, -1),
-  make_tuple(8, 16, sad8x16x4d_c, -1),
-  make_tuple(8, 8, sad8x8x4d_c, -1),
-  make_tuple(8, 4, sad8x4x4d_c, -1),
-  make_tuple(4, 8, sad4x8x4d_c, -1),
-  make_tuple(4, 4, sad4x4x4d_c, -1),
+  make_tuple(64, 64, &vpx_sad64x64x4d_c, -1),
+  make_tuple(64, 32, &vpx_sad64x32x4d_c, -1),
+  make_tuple(32, 64, &vpx_sad32x64x4d_c, -1),
+  make_tuple(32, 32, &vpx_sad32x32x4d_c, -1),
+  make_tuple(32, 16, &vpx_sad32x16x4d_c, -1),
+  make_tuple(16, 32, &vpx_sad16x32x4d_c, -1),
+  make_tuple(16, 16, &vpx_sad16x16x4d_c, -1),
+  make_tuple(16, 8, &vpx_sad16x8x4d_c, -1),
+  make_tuple(8, 16, &vpx_sad8x16x4d_c, -1),
+  make_tuple(8, 8, &vpx_sad8x8x4d_c, -1),
+  make_tuple(8, 4, &vpx_sad8x4x4d_c, -1),
+  make_tuple(4, 8, &vpx_sad4x8x4d_c, -1),
+  make_tuple(4, 4, &vpx_sad4x4x4d_c, -1),
 #if CONFIG_VP9_HIGHBITDEPTH
-  make_tuple(64, 64, highbd_sad64x64x4d_c, 8),
-  make_tuple(64, 32, highbd_sad64x32x4d_c, 8),
-  make_tuple(32, 64, highbd_sad32x64x4d_c, 8),
-  make_tuple(32, 32, highbd_sad32x32x4d_c, 8),
-  make_tuple(32, 16, highbd_sad32x16x4d_c, 8),
-  make_tuple(16, 32, highbd_sad16x32x4d_c, 8),
-  make_tuple(16, 16, highbd_sad16x16x4d_c, 8),
-  make_tuple(16, 8, highbd_sad16x8x4d_c, 8),
-  make_tuple(8, 16, highbd_sad8x16x4d_c, 8),
-  make_tuple(8, 8, highbd_sad8x8x4d_c, 8),
-  make_tuple(8, 4, highbd_sad8x4x4d_c, 8),
-  make_tuple(4, 8, highbd_sad4x8x4d_c, 8),
-  make_tuple(4, 4, highbd_sad4x4x4d_c, 8),
-  make_tuple(64, 64, highbd_sad64x64x4d_c, 10),
-  make_tuple(64, 32, highbd_sad64x32x4d_c, 10),
-  make_tuple(32, 64, highbd_sad32x64x4d_c, 10),
-  make_tuple(32, 32, highbd_sad32x32x4d_c, 10),
-  make_tuple(32, 16, highbd_sad32x16x4d_c, 10),
-  make_tuple(16, 32, highbd_sad16x32x4d_c, 10),
-  make_tuple(16, 16, highbd_sad16x16x4d_c, 10),
-  make_tuple(16, 8, highbd_sad16x8x4d_c, 10),
-  make_tuple(8, 16, highbd_sad8x16x4d_c, 10),
-  make_tuple(8, 8, highbd_sad8x8x4d_c, 10),
-  make_tuple(8, 4, highbd_sad8x4x4d_c, 10),
-  make_tuple(4, 8, highbd_sad4x8x4d_c, 10),
-  make_tuple(4, 4, highbd_sad4x4x4d_c, 10),
-  make_tuple(64, 64, highbd_sad64x64x4d_c, 12),
-  make_tuple(64, 32, highbd_sad64x32x4d_c, 12),
-  make_tuple(32, 64, highbd_sad32x64x4d_c, 12),
-  make_tuple(32, 32, highbd_sad32x32x4d_c, 12),
-  make_tuple(32, 16, highbd_sad32x16x4d_c, 12),
-  make_tuple(16, 32, highbd_sad16x32x4d_c, 12),
-  make_tuple(16, 16, highbd_sad16x16x4d_c, 12),
-  make_tuple(16, 8, highbd_sad16x8x4d_c, 12),
-  make_tuple(8, 16, highbd_sad8x16x4d_c, 12),
-  make_tuple(8, 8, highbd_sad8x8x4d_c, 12),
-  make_tuple(8, 4, highbd_sad8x4x4d_c, 12),
-  make_tuple(4, 8, highbd_sad4x8x4d_c, 12),
-  make_tuple(4, 4, highbd_sad4x4x4d_c, 12),
+  make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 8),
+  make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 8),
+  make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 8),
+  make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 8),
+  make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 8),
+  make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 8),
+  make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 8),
+  make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 8),
+  make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 8),
+  make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 8),
+  make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 8),
+  make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 8),
+  make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 8),
+  make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 10),
+  make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 10),
+  make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 10),
+  make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 10),
+  make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 10),
+  make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 10),
+  make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 10),
+  make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 10),
+  make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 10),
+  make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 10),
+  make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 10),
+  make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 10),
+  make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 10),
+  make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 12),
+  make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 12),
+  make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 12),
+  make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 12),
+  make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 12),
+  make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 12),
+  make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 12),
+  make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 12),
+  make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 12),
+  make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 12),
+  make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 12),
+  make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 12),
+  make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 12),
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
@@ -745,40 +661,28 @@
 //------------------------------------------------------------------------------
 // ARM functions
 #if HAVE_MEDIA
-const SadMxNFunc sad16x16_media = vpx_sad16x16_media;
 const SadMxNParam media_tests[] = {
-  make_tuple(16, 16, sad16x16_media, -1),
+  make_tuple(16, 16, &vpx_sad16x16_media, -1),
 };
 INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::ValuesIn(media_tests));
 #endif  // HAVE_MEDIA
 
 #if HAVE_NEON
-const SadMxNFunc sad64x64_neon = vpx_sad64x64_neon;
-const SadMxNFunc sad32x32_neon = vpx_sad32x32_neon;
-const SadMxNFunc sad16x16_neon = vpx_sad16x16_neon;
-const SadMxNFunc sad16x8_neon = vpx_sad16x8_neon;
-const SadMxNFunc sad8x16_neon = vpx_sad8x16_neon;
-const SadMxNFunc sad8x8_neon = vpx_sad8x8_neon;
-const SadMxNFunc sad4x4_neon = vpx_sad4x4_neon;
-
 const SadMxNParam neon_tests[] = {
-  make_tuple(64, 64, sad64x64_neon, -1),
-  make_tuple(32, 32, sad32x32_neon, -1),
-  make_tuple(16, 16, sad16x16_neon, -1),
-  make_tuple(16, 8, sad16x8_neon, -1),
-  make_tuple(8, 16, sad8x16_neon, -1),
-  make_tuple(8, 8, sad8x8_neon, -1),
-  make_tuple(4, 4, sad4x4_neon, -1),
+  make_tuple(64, 64, &vpx_sad64x64_neon, -1),
+  make_tuple(32, 32, &vpx_sad32x32_neon, -1),
+  make_tuple(16, 16, &vpx_sad16x16_neon, -1),
+  make_tuple(16, 8, &vpx_sad16x8_neon, -1),
+  make_tuple(8, 16, &vpx_sad8x16_neon, -1),
+  make_tuple(8, 8, &vpx_sad8x8_neon, -1),
+  make_tuple(4, 4, &vpx_sad4x4_neon, -1),
 };
 INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
 
-const SadMxNx4Func sad64x64x4d_neon = vpx_sad64x64x4d_neon;
-const SadMxNx4Func sad32x32x4d_neon = vpx_sad32x32x4d_neon;
-const SadMxNx4Func sad16x16x4d_neon = vpx_sad16x16x4d_neon;
 const SadMxNx4Param x4d_neon_tests[] = {
-  make_tuple(64, 64, sad64x64x4d_neon, -1),
-  make_tuple(32, 32, sad32x32x4d_neon, -1),
-  make_tuple(16, 16, sad16x16x4d_neon, -1),
+  make_tuple(64, 64, &vpx_sad64x64x4d_neon, -1),
+  make_tuple(32, 32, &vpx_sad32x32x4d_neon, -1),
+  make_tuple(16, 16, &vpx_sad16x16x4d_neon, -1),
 };
 INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
 #endif  // HAVE_NEON
@@ -786,44 +690,33 @@
 //------------------------------------------------------------------------------
 // x86 functions
 #if HAVE_MMX
-const SadMxNFunc sad16x16_mmx = vpx_sad16x16_mmx;
-const SadMxNFunc sad16x8_mmx = vpx_sad16x8_mmx;
-const SadMxNFunc sad8x16_mmx = vpx_sad8x16_mmx;
-const SadMxNFunc sad8x8_mmx = vpx_sad8x8_mmx;
-const SadMxNFunc sad4x4_mmx = vpx_sad4x4_mmx;
 const SadMxNParam mmx_tests[] = {
-  make_tuple(16, 16, sad16x16_mmx, -1),
-  make_tuple(16, 8, sad16x8_mmx, -1),
-  make_tuple(8, 16, sad8x16_mmx, -1),
-  make_tuple(8, 8, sad8x8_mmx, -1),
-  make_tuple(4, 4, sad4x4_mmx, -1),
+  make_tuple(16, 16, &vpx_sad16x16_mmx, -1),
+  make_tuple(16, 8, &vpx_sad16x8_mmx, -1),
+  make_tuple(8, 16, &vpx_sad8x16_mmx, -1),
+  make_tuple(8, 8, &vpx_sad8x8_mmx, -1),
+  make_tuple(4, 4, &vpx_sad4x4_mmx, -1),
 };
 INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
 #endif  // HAVE_MMX
 
 #if HAVE_SSE
 #if CONFIG_USE_X86INC
-const SadMxNFunc sad4x8_sse = vpx_sad4x8_sse;
-const SadMxNFunc sad4x4_sse = vpx_sad4x4_sse;
 const SadMxNParam sse_tests[] = {
-  make_tuple(4, 8, sad4x8_sse, -1),
-  make_tuple(4, 4, sad4x4_sse, -1),
+  make_tuple(4, 8, &vpx_sad4x8_sse, -1),
+  make_tuple(4, 4, &vpx_sad4x4_sse, -1),
 };
 INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::ValuesIn(sse_tests));
 
-const SadMxNAvgFunc sad4x8_avg_sse = vpx_sad4x8_avg_sse;
-const SadMxNAvgFunc sad4x4_avg_sse = vpx_sad4x4_avg_sse;
 const SadMxNAvgParam avg_sse_tests[] = {
-  make_tuple(4, 8, sad4x8_avg_sse, -1),
-  make_tuple(4, 4, sad4x4_avg_sse, -1),
+  make_tuple(4, 8, &vpx_sad4x8_avg_sse, -1),
+  make_tuple(4, 4, &vpx_sad4x4_avg_sse, -1),
 };
 INSTANTIATE_TEST_CASE_P(SSE, SADavgTest, ::testing::ValuesIn(avg_sse_tests));
 
-const SadMxNx4Func sad4x8x4d_sse = vpx_sad4x8x4d_sse;
-const SadMxNx4Func sad4x4x4d_sse = vpx_sad4x4x4d_sse;
 const SadMxNx4Param x4d_sse_tests[] = {
-  make_tuple(4, 8, sad4x8x4d_sse, -1),
-  make_tuple(4, 4, sad4x4x4d_sse, -1),
+  make_tuple(4, 8, &vpx_sad4x8x4d_sse, -1),
+  make_tuple(4, 4, &vpx_sad4x4x4d_sse, -1),
 };
 INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests));
 #endif  // CONFIG_USE_X86INC
@@ -831,232 +724,158 @@
 
 #if HAVE_SSE2
 #if CONFIG_USE_X86INC
-const SadMxNFunc sad64x64_sse2 = vpx_sad64x64_sse2;
-const SadMxNFunc sad64x32_sse2 = vpx_sad64x32_sse2;
-const SadMxNFunc sad32x64_sse2 = vpx_sad32x64_sse2;
-const SadMxNFunc sad32x32_sse2 = vpx_sad32x32_sse2;
-const SadMxNFunc sad32x16_sse2 = vpx_sad32x16_sse2;
-const SadMxNFunc sad16x32_sse2 = vpx_sad16x32_sse2;
-const SadMxNFunc sad16x16_sse2 = vpx_sad16x16_sse2;
-const SadMxNFunc sad16x8_sse2 = vpx_sad16x8_sse2;
-const SadMxNFunc sad8x16_sse2 = vpx_sad8x16_sse2;
-const SadMxNFunc sad8x8_sse2 = vpx_sad8x8_sse2;
-const SadMxNFunc sad8x4_sse2 = vpx_sad8x4_sse2;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNFunc highbd_sad64x64_sse2 = vpx_highbd_sad64x64_sse2;
-const SadMxNFunc highbd_sad64x32_sse2 = vpx_highbd_sad64x32_sse2;
-const SadMxNFunc highbd_sad32x64_sse2 = vpx_highbd_sad32x64_sse2;
-const SadMxNFunc highbd_sad32x32_sse2 = vpx_highbd_sad32x32_sse2;
-const SadMxNFunc highbd_sad32x16_sse2 = vpx_highbd_sad32x16_sse2;
-const SadMxNFunc highbd_sad16x32_sse2 = vpx_highbd_sad16x32_sse2;
-const SadMxNFunc highbd_sad16x16_sse2 = vpx_highbd_sad16x16_sse2;
-const SadMxNFunc highbd_sad16x8_sse2 = vpx_highbd_sad16x8_sse2;
-const SadMxNFunc highbd_sad8x16_sse2 = vpx_highbd_sad8x16_sse2;
-const SadMxNFunc highbd_sad8x8_sse2 = vpx_highbd_sad8x8_sse2;
-const SadMxNFunc highbd_sad8x4_sse2 = vpx_highbd_sad8x4_sse2;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 const SadMxNParam sse2_tests[] = {
-  make_tuple(64, 64, sad64x64_sse2, -1),
-  make_tuple(64, 32, sad64x32_sse2, -1),
-  make_tuple(32, 64, sad32x64_sse2, -1),
-  make_tuple(32, 32, sad32x32_sse2, -1),
-  make_tuple(32, 16, sad32x16_sse2, -1),
-  make_tuple(16, 32, sad16x32_sse2, -1),
-  make_tuple(16, 16, sad16x16_sse2, -1),
-  make_tuple(16, 8, sad16x8_sse2, -1),
-  make_tuple(8, 16, sad8x16_sse2, -1),
-  make_tuple(8, 8, sad8x8_sse2, -1),
-  make_tuple(8, 4, sad8x4_sse2, -1),
+  make_tuple(64, 64, &vpx_sad64x64_sse2, -1),
+  make_tuple(64, 32, &vpx_sad64x32_sse2, -1),
+  make_tuple(32, 64, &vpx_sad32x64_sse2, -1),
+  make_tuple(32, 32, &vpx_sad32x32_sse2, -1),
+  make_tuple(32, 16, &vpx_sad32x16_sse2, -1),
+  make_tuple(16, 32, &vpx_sad16x32_sse2, -1),
+  make_tuple(16, 16, &vpx_sad16x16_sse2, -1),
+  make_tuple(16, 8, &vpx_sad16x8_sse2, -1),
+  make_tuple(8, 16, &vpx_sad8x16_sse2, -1),
+  make_tuple(8, 8, &vpx_sad8x8_sse2, -1),
+  make_tuple(8, 4, &vpx_sad8x4_sse2, -1),
 #if CONFIG_VP9_HIGHBITDEPTH
-  make_tuple(64, 64, highbd_sad64x64_sse2, 8),
-  make_tuple(64, 32, highbd_sad64x32_sse2, 8),
-  make_tuple(32, 64, highbd_sad32x64_sse2, 8),
-  make_tuple(32, 32, highbd_sad32x32_sse2, 8),
-  make_tuple(32, 16, highbd_sad32x16_sse2, 8),
-  make_tuple(16, 32, highbd_sad16x32_sse2, 8),
-  make_tuple(16, 16, highbd_sad16x16_sse2, 8),
-  make_tuple(16, 8, highbd_sad16x8_sse2, 8),
-  make_tuple(8, 16, highbd_sad8x16_sse2, 8),
-  make_tuple(8, 8, highbd_sad8x8_sse2, 8),
-  make_tuple(8, 4, highbd_sad8x4_sse2, 8),
-  make_tuple(64, 64, highbd_sad64x64_sse2, 10),
-  make_tuple(64, 32, highbd_sad64x32_sse2, 10),
-  make_tuple(32, 64, highbd_sad32x64_sse2, 10),
-  make_tuple(32, 32, highbd_sad32x32_sse2, 10),
-  make_tuple(32, 16, highbd_sad32x16_sse2, 10),
-  make_tuple(16, 32, highbd_sad16x32_sse2, 10),
-  make_tuple(16, 16, highbd_sad16x16_sse2, 10),
-  make_tuple(16, 8, highbd_sad16x8_sse2, 10),
-  make_tuple(8, 16, highbd_sad8x16_sse2, 10),
-  make_tuple(8, 8, highbd_sad8x8_sse2, 10),
-  make_tuple(8, 4, highbd_sad8x4_sse2, 10),
-  make_tuple(64, 64, highbd_sad64x64_sse2, 12),
-  make_tuple(64, 32, highbd_sad64x32_sse2, 12),
-  make_tuple(32, 64, highbd_sad32x64_sse2, 12),
-  make_tuple(32, 32, highbd_sad32x32_sse2, 12),
-  make_tuple(32, 16, highbd_sad32x16_sse2, 12),
-  make_tuple(16, 32, highbd_sad16x32_sse2, 12),
-  make_tuple(16, 16, highbd_sad16x16_sse2, 12),
-  make_tuple(16, 8, highbd_sad16x8_sse2, 12),
-  make_tuple(8, 16, highbd_sad8x16_sse2, 12),
-  make_tuple(8, 8, highbd_sad8x8_sse2, 12),
-  make_tuple(8, 4, highbd_sad8x4_sse2, 12),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 8),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 8),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 8),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 8),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 8),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 8),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 8),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 8),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 8),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 8),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 8),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 10),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 10),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 10),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 10),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 10),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 10),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 10),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 10),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 10),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 10),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 10),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 12),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 12),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 12),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 12),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 12),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 12),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 12),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 12),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 12),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 12),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 12),
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
 
-const SadMxNAvgFunc sad64x64_avg_sse2 = vpx_sad64x64_avg_sse2;
-const SadMxNAvgFunc sad64x32_avg_sse2 = vpx_sad64x32_avg_sse2;
-const SadMxNAvgFunc sad32x64_avg_sse2 = vpx_sad32x64_avg_sse2;
-const SadMxNAvgFunc sad32x32_avg_sse2 = vpx_sad32x32_avg_sse2;
-const SadMxNAvgFunc sad32x16_avg_sse2 = vpx_sad32x16_avg_sse2;
-const SadMxNAvgFunc sad16x32_avg_sse2 = vpx_sad16x32_avg_sse2;
-const SadMxNAvgFunc sad16x16_avg_sse2 = vpx_sad16x16_avg_sse2;
-const SadMxNAvgFunc sad16x8_avg_sse2 = vpx_sad16x8_avg_sse2;
-const SadMxNAvgFunc sad8x16_avg_sse2 = vpx_sad8x16_avg_sse2;
-const SadMxNAvgFunc sad8x8_avg_sse2 = vpx_sad8x8_avg_sse2;
-const SadMxNAvgFunc sad8x4_avg_sse2 = vpx_sad8x4_avg_sse2;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNAvgFunc highbd_sad64x64_avg_sse2 = vpx_highbd_sad64x64_avg_sse2;
-const SadMxNAvgFunc highbd_sad64x32_avg_sse2 = vpx_highbd_sad64x32_avg_sse2;
-const SadMxNAvgFunc highbd_sad32x64_avg_sse2 = vpx_highbd_sad32x64_avg_sse2;
-const SadMxNAvgFunc highbd_sad32x32_avg_sse2 = vpx_highbd_sad32x32_avg_sse2;
-const SadMxNAvgFunc highbd_sad32x16_avg_sse2 = vpx_highbd_sad32x16_avg_sse2;
-const SadMxNAvgFunc highbd_sad16x32_avg_sse2 = vpx_highbd_sad16x32_avg_sse2;
-const SadMxNAvgFunc highbd_sad16x16_avg_sse2 = vpx_highbd_sad16x16_avg_sse2;
-const SadMxNAvgFunc highbd_sad16x8_avg_sse2 = vpx_highbd_sad16x8_avg_sse2;
-const SadMxNAvgFunc highbd_sad8x16_avg_sse2 = vpx_highbd_sad8x16_avg_sse2;
-const SadMxNAvgFunc highbd_sad8x8_avg_sse2 = vpx_highbd_sad8x8_avg_sse2;
-const SadMxNAvgFunc highbd_sad8x4_avg_sse2 = vpx_highbd_sad8x4_avg_sse2;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 const SadMxNAvgParam avg_sse2_tests[] = {
-  make_tuple(64, 64, sad64x64_avg_sse2, -1),
-  make_tuple(64, 32, sad64x32_avg_sse2, -1),
-  make_tuple(32, 64, sad32x64_avg_sse2, -1),
-  make_tuple(32, 32, sad32x32_avg_sse2, -1),
-  make_tuple(32, 16, sad32x16_avg_sse2, -1),
-  make_tuple(16, 32, sad16x32_avg_sse2, -1),
-  make_tuple(16, 16, sad16x16_avg_sse2, -1),
-  make_tuple(16, 8, sad16x8_avg_sse2, -1),
-  make_tuple(8, 16, sad8x16_avg_sse2, -1),
-  make_tuple(8, 8, sad8x8_avg_sse2, -1),
-  make_tuple(8, 4, sad8x4_avg_sse2, -1),
+  make_tuple(64, 64, &vpx_sad64x64_avg_sse2, -1),
+  make_tuple(64, 32, &vpx_sad64x32_avg_sse2, -1),
+  make_tuple(32, 64, &vpx_sad32x64_avg_sse2, -1),
+  make_tuple(32, 32, &vpx_sad32x32_avg_sse2, -1),
+  make_tuple(32, 16, &vpx_sad32x16_avg_sse2, -1),
+  make_tuple(16, 32, &vpx_sad16x32_avg_sse2, -1),
+  make_tuple(16, 16, &vpx_sad16x16_avg_sse2, -1),
+  make_tuple(16, 8, &vpx_sad16x8_avg_sse2, -1),
+  make_tuple(8, 16, &vpx_sad8x16_avg_sse2, -1),
+  make_tuple(8, 8, &vpx_sad8x8_avg_sse2, -1),
+  make_tuple(8, 4, &vpx_sad8x4_avg_sse2, -1),
 #if CONFIG_VP9_HIGHBITDEPTH
-  make_tuple(64, 64, highbd_sad64x64_avg_sse2, 8),
-  make_tuple(64, 32, highbd_sad64x32_avg_sse2, 8),
-  make_tuple(32, 64, highbd_sad32x64_avg_sse2, 8),
-  make_tuple(32, 32, highbd_sad32x32_avg_sse2, 8),
-  make_tuple(32, 16, highbd_sad32x16_avg_sse2, 8),
-  make_tuple(16, 32, highbd_sad16x32_avg_sse2, 8),
-  make_tuple(16, 16, highbd_sad16x16_avg_sse2, 8),
-  make_tuple(16, 8, highbd_sad16x8_avg_sse2, 8),
-  make_tuple(8, 16, highbd_sad8x16_avg_sse2, 8),
-  make_tuple(8, 8, highbd_sad8x8_avg_sse2, 8),
-  make_tuple(8, 4, highbd_sad8x4_avg_sse2, 8),
-  make_tuple(64, 64, highbd_sad64x64_avg_sse2, 10),
-  make_tuple(64, 32, highbd_sad64x32_avg_sse2, 10),
-  make_tuple(32, 64, highbd_sad32x64_avg_sse2, 10),
-  make_tuple(32, 32, highbd_sad32x32_avg_sse2, 10),
-  make_tuple(32, 16, highbd_sad32x16_avg_sse2, 10),
-  make_tuple(16, 32, highbd_sad16x32_avg_sse2, 10),
-  make_tuple(16, 16, highbd_sad16x16_avg_sse2, 10),
-  make_tuple(16, 8, highbd_sad16x8_avg_sse2, 10),
-  make_tuple(8, 16, highbd_sad8x16_avg_sse2, 10),
-  make_tuple(8, 8, highbd_sad8x8_avg_sse2, 10),
-  make_tuple(8, 4, highbd_sad8x4_avg_sse2, 10),
-  make_tuple(64, 64, highbd_sad64x64_avg_sse2, 12),
-  make_tuple(64, 32, highbd_sad64x32_avg_sse2, 12),
-  make_tuple(32, 64, highbd_sad32x64_avg_sse2, 12),
-  make_tuple(32, 32, highbd_sad32x32_avg_sse2, 12),
-  make_tuple(32, 16, highbd_sad32x16_avg_sse2, 12),
-  make_tuple(16, 32, highbd_sad16x32_avg_sse2, 12),
-  make_tuple(16, 16, highbd_sad16x16_avg_sse2, 12),
-  make_tuple(16, 8, highbd_sad16x8_avg_sse2, 12),
-  make_tuple(8, 16, highbd_sad8x16_avg_sse2, 12),
-  make_tuple(8, 8, highbd_sad8x8_avg_sse2, 12),
-  make_tuple(8, 4, highbd_sad8x4_avg_sse2, 12),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 8),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 8),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 8),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 8),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 8),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 8),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 8),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 8),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 8),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 8),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 8),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 10),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 10),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 10),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 10),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 10),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 10),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 10),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 10),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 10),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 10),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 10),
+  make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 12),
+  make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 12),
+  make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 12),
+  make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 12),
+  make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 12),
+  make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 12),
+  make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 12),
+  make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 12),
+  make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 12),
+  make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 12),
+  make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 12),
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests));
 
-const SadMxNx4Func sad64x64x4d_sse2 = vpx_sad64x64x4d_sse2;
-const SadMxNx4Func sad64x32x4d_sse2 = vpx_sad64x32x4d_sse2;
-const SadMxNx4Func sad32x64x4d_sse2 = vpx_sad32x64x4d_sse2;
-const SadMxNx4Func sad32x32x4d_sse2 = vpx_sad32x32x4d_sse2;
-const SadMxNx4Func sad32x16x4d_sse2 = vpx_sad32x16x4d_sse2;
-const SadMxNx4Func sad16x32x4d_sse2 = vpx_sad16x32x4d_sse2;
-const SadMxNx4Func sad16x16x4d_sse2 = vpx_sad16x16x4d_sse2;
-const SadMxNx4Func sad16x8x4d_sse2 = vpx_sad16x8x4d_sse2;
-const SadMxNx4Func sad8x16x4d_sse2 = vpx_sad8x16x4d_sse2;
-const SadMxNx4Func sad8x8x4d_sse2 = vpx_sad8x8x4d_sse2;
-const SadMxNx4Func sad8x4x4d_sse2 = vpx_sad8x4x4d_sse2;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNx4Func highbd_sad64x64x4d_sse2 = vpx_highbd_sad64x64x4d_sse2;
-const SadMxNx4Func highbd_sad64x32x4d_sse2 = vpx_highbd_sad64x32x4d_sse2;
-const SadMxNx4Func highbd_sad32x64x4d_sse2 = vpx_highbd_sad32x64x4d_sse2;
-const SadMxNx4Func highbd_sad32x32x4d_sse2 = vpx_highbd_sad32x32x4d_sse2;
-const SadMxNx4Func highbd_sad32x16x4d_sse2 = vpx_highbd_sad32x16x4d_sse2;
-const SadMxNx4Func highbd_sad16x32x4d_sse2 = vpx_highbd_sad16x32x4d_sse2;
-const SadMxNx4Func highbd_sad16x16x4d_sse2 = vpx_highbd_sad16x16x4d_sse2;
-const SadMxNx4Func highbd_sad16x8x4d_sse2 = vpx_highbd_sad16x8x4d_sse2;
-const SadMxNx4Func highbd_sad8x16x4d_sse2 = vpx_highbd_sad8x16x4d_sse2;
-const SadMxNx4Func highbd_sad8x8x4d_sse2 = vpx_highbd_sad8x8x4d_sse2;
-const SadMxNx4Func highbd_sad8x4x4d_sse2 = vpx_highbd_sad8x4x4d_sse2;
-const SadMxNx4Func highbd_sad4x8x4d_sse2 = vpx_highbd_sad4x8x4d_sse2;
-const SadMxNx4Func highbd_sad4x4x4d_sse2 = vpx_highbd_sad4x4x4d_sse2;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 const SadMxNx4Param x4d_sse2_tests[] = {
-  make_tuple(64, 64, sad64x64x4d_sse2, -1),
-  make_tuple(64, 32, sad64x32x4d_sse2, -1),
-  make_tuple(32, 64, sad32x64x4d_sse2, -1),
-  make_tuple(32, 32, sad32x32x4d_sse2, -1),
-  make_tuple(32, 16, sad32x16x4d_sse2, -1),
-  make_tuple(16, 32, sad16x32x4d_sse2, -1),
-  make_tuple(16, 16, sad16x16x4d_sse2, -1),
-  make_tuple(16, 8, sad16x8x4d_sse2, -1),
-  make_tuple(8, 16, sad8x16x4d_sse2, -1),
-  make_tuple(8, 8, sad8x8x4d_sse2, -1),
-  make_tuple(8, 4, sad8x4x4d_sse2, -1),
+  make_tuple(64, 64, &vpx_sad64x64x4d_sse2, -1),
+  make_tuple(64, 32, &vpx_sad64x32x4d_sse2, -1),
+  make_tuple(32, 64, &vpx_sad32x64x4d_sse2, -1),
+  make_tuple(32, 32, &vpx_sad32x32x4d_sse2, -1),
+  make_tuple(32, 16, &vpx_sad32x16x4d_sse2, -1),
+  make_tuple(16, 32, &vpx_sad16x32x4d_sse2, -1),
+  make_tuple(16, 16, &vpx_sad16x16x4d_sse2, -1),
+  make_tuple(16, 8, &vpx_sad16x8x4d_sse2, -1),
+  make_tuple(8, 16, &vpx_sad8x16x4d_sse2, -1),
+  make_tuple(8, 8, &vpx_sad8x8x4d_sse2, -1),
+  make_tuple(8, 4, &vpx_sad8x4x4d_sse2, -1),
 #if CONFIG_VP9_HIGHBITDEPTH
-  make_tuple(64, 64, highbd_sad64x64x4d_sse2, 8),
-  make_tuple(64, 32, highbd_sad64x32x4d_sse2, 8),
-  make_tuple(32, 64, highbd_sad32x64x4d_sse2, 8),
-  make_tuple(32, 32, highbd_sad32x32x4d_sse2, 8),
-  make_tuple(32, 16, highbd_sad32x16x4d_sse2, 8),
-  make_tuple(16, 32, highbd_sad16x32x4d_sse2, 8),
-  make_tuple(16, 16, highbd_sad16x16x4d_sse2, 8),
-  make_tuple(16, 8, highbd_sad16x8x4d_sse2, 8),
-  make_tuple(8, 16, highbd_sad8x16x4d_sse2, 8),
-  make_tuple(8, 8, highbd_sad8x8x4d_sse2, 8),
-  make_tuple(8, 4, highbd_sad8x4x4d_sse2, 8),
-  make_tuple(4, 8, highbd_sad4x8x4d_sse2, 8),
-  make_tuple(4, 4, highbd_sad4x4x4d_sse2, 8),
-  make_tuple(64, 64, highbd_sad64x64x4d_sse2, 10),
-  make_tuple(64, 32, highbd_sad64x32x4d_sse2, 10),
-  make_tuple(32, 64, highbd_sad32x64x4d_sse2, 10),
-  make_tuple(32, 32, highbd_sad32x32x4d_sse2, 10),
-  make_tuple(32, 16, highbd_sad32x16x4d_sse2, 10),
-  make_tuple(16, 32, highbd_sad16x32x4d_sse2, 10),
-  make_tuple(16, 16, highbd_sad16x16x4d_sse2, 10),
-  make_tuple(16, 8, highbd_sad16x8x4d_sse2, 10),
-  make_tuple(8, 16, highbd_sad8x16x4d_sse2, 10),
-  make_tuple(8, 8, highbd_sad8x8x4d_sse2, 10),
-  make_tuple(8, 4, highbd_sad8x4x4d_sse2, 10),
-  make_tuple(4, 8, highbd_sad4x8x4d_sse2, 10),
-  make_tuple(4, 4, highbd_sad4x4x4d_sse2, 10),
-  make_tuple(64, 64, highbd_sad64x64x4d_sse2, 12),
-  make_tuple(64, 32, highbd_sad64x32x4d_sse2, 12),
-  make_tuple(32, 64, highbd_sad32x64x4d_sse2, 12),
-  make_tuple(32, 32, highbd_sad32x32x4d_sse2, 12),
-  make_tuple(32, 16, highbd_sad32x16x4d_sse2, 12),
-  make_tuple(16, 32, highbd_sad16x32x4d_sse2, 12),
-  make_tuple(16, 16, highbd_sad16x16x4d_sse2, 12),
-  make_tuple(16, 8, highbd_sad16x8x4d_sse2, 12),
-  make_tuple(8, 16, highbd_sad8x16x4d_sse2, 12),
-  make_tuple(8, 8, highbd_sad8x8x4d_sse2, 12),
-  make_tuple(8, 4, highbd_sad8x4x4d_sse2, 12),
-  make_tuple(4, 8, highbd_sad4x8x4d_sse2, 12),
-  make_tuple(4, 4, highbd_sad4x4x4d_sse2, 12),
+  make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 8),
+  make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 8),
+  make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 8),
+  make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 8),
+  make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 8),
+  make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 8),
+  make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 8),
+  make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 8),
+  make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 8),
+  make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 8),
+  make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 8),
+  make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 8),
+  make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 8),
+  make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 10),
+  make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 10),
+  make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 10),
+  make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 10),
+  make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 10),
+  make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 10),
+  make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 10),
+  make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 10),
+  make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 10),
+  make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 10),
+  make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 10),
+  make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 10),
+  make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 10),
+  make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 12),
+  make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 12),
+  make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 12),
+  make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 12),
+  make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 12),
+  make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 12),
+  make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 12),
+  make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 12),
+  make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 12),
+  make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 12),
+  make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 12),
+  make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 12),
+  make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 12),
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
@@ -1076,39 +895,27 @@
 #endif  // HAVE_SSE4_1
 
 #if HAVE_AVX2
-const SadMxNFunc sad64x64_avx2 = vpx_sad64x64_avx2;
-const SadMxNFunc sad64x32_avx2 = vpx_sad64x32_avx2;
-const SadMxNFunc sad32x64_avx2 = vpx_sad32x64_avx2;
-const SadMxNFunc sad32x32_avx2 = vpx_sad32x32_avx2;
-const SadMxNFunc sad32x16_avx2 = vpx_sad32x16_avx2;
 const SadMxNParam avx2_tests[] = {
-  make_tuple(64, 64, sad64x64_avx2, -1),
-  make_tuple(64, 32, sad64x32_avx2, -1),
-  make_tuple(32, 64, sad32x64_avx2, -1),
-  make_tuple(32, 32, sad32x32_avx2, -1),
-  make_tuple(32, 16, sad32x16_avx2, -1),
+  make_tuple(64, 64, &vpx_sad64x64_avx2, -1),
+  make_tuple(64, 32, &vpx_sad64x32_avx2, -1),
+  make_tuple(32, 64, &vpx_sad32x64_avx2, -1),
+  make_tuple(32, 32, &vpx_sad32x32_avx2, -1),
+  make_tuple(32, 16, &vpx_sad32x16_avx2, -1),
 };
 INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
 
-const SadMxNAvgFunc sad64x64_avg_avx2 = vpx_sad64x64_avg_avx2;
-const SadMxNAvgFunc sad64x32_avg_avx2 = vpx_sad64x32_avg_avx2;
-const SadMxNAvgFunc sad32x64_avg_avx2 = vpx_sad32x64_avg_avx2;
-const SadMxNAvgFunc sad32x32_avg_avx2 = vpx_sad32x32_avg_avx2;
-const SadMxNAvgFunc sad32x16_avg_avx2 = vpx_sad32x16_avg_avx2;
 const SadMxNAvgParam avg_avx2_tests[] = {
-  make_tuple(64, 64, sad64x64_avg_avx2, -1),
-  make_tuple(64, 32, sad64x32_avg_avx2, -1),
-  make_tuple(32, 64, sad32x64_avg_avx2, -1),
-  make_tuple(32, 32, sad32x32_avg_avx2, -1),
-  make_tuple(32, 16, sad32x16_avg_avx2, -1),
+  make_tuple(64, 64, &vpx_sad64x64_avg_avx2, -1),
+  make_tuple(64, 32, &vpx_sad64x32_avg_avx2, -1),
+  make_tuple(32, 64, &vpx_sad32x64_avg_avx2, -1),
+  make_tuple(32, 32, &vpx_sad32x32_avg_avx2, -1),
+  make_tuple(32, 16, &vpx_sad32x16_avg_avx2, -1),
 };
 INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
 
-const SadMxNx4Func sad64x64x4d_avx2 = vpx_sad64x64x4d_avx2;
-const SadMxNx4Func sad32x32x4d_avx2 = vpx_sad32x32x4d_avx2;
 const SadMxNx4Param x4d_avx2_tests[] = {
-  make_tuple(64, 64, sad64x64x4d_avx2, -1),
-  make_tuple(32, 32, sad32x32x4d_avx2, -1),
+  make_tuple(64, 64, &vpx_sad64x64x4d_avx2, -1),
+  make_tuple(32, 32, &vpx_sad32x32x4d_avx2, -1),
 };
 INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
 #endif  // HAVE_AVX2
@@ -1116,93 +923,54 @@
 //------------------------------------------------------------------------------
 // MIPS functions
 #if HAVE_MSA
-const SadMxNFunc sad64x64_msa = vpx_sad64x64_msa;
-const SadMxNFunc sad64x32_msa = vpx_sad64x32_msa;
-const SadMxNFunc sad32x64_msa = vpx_sad32x64_msa;
-const SadMxNFunc sad32x32_msa = vpx_sad32x32_msa;
-const SadMxNFunc sad32x16_msa = vpx_sad32x16_msa;
-const SadMxNFunc sad16x32_msa = vpx_sad16x32_msa;
-const SadMxNFunc sad16x16_msa = vpx_sad16x16_msa;
-const SadMxNFunc sad16x8_msa = vpx_sad16x8_msa;
-const SadMxNFunc sad8x16_msa = vpx_sad8x16_msa;
-const SadMxNFunc sad8x8_msa = vpx_sad8x8_msa;
-const SadMxNFunc sad8x4_msa = vpx_sad8x4_msa;
-const SadMxNFunc sad4x8_msa = vpx_sad4x8_msa;
-const SadMxNFunc sad4x4_msa = vpx_sad4x4_msa;
 const SadMxNParam msa_tests[] = {
-  make_tuple(64, 64, sad64x64_msa, -1),
-  make_tuple(64, 32, sad64x32_msa, -1),
-  make_tuple(32, 64, sad32x64_msa, -1),
-  make_tuple(32, 32, sad32x32_msa, -1),
-  make_tuple(32, 16, sad32x16_msa, -1),
-  make_tuple(16, 32, sad16x32_msa, -1),
-  make_tuple(16, 16, sad16x16_msa, -1),
-  make_tuple(16, 8, sad16x8_msa, -1),
-  make_tuple(8, 16, sad8x16_msa, -1),
-  make_tuple(8, 8, sad8x8_msa, -1),
-  make_tuple(8, 4, sad8x4_msa, -1),
-  make_tuple(4, 8, sad4x8_msa, -1),
-  make_tuple(4, 4, sad4x4_msa, -1),
+  make_tuple(64, 64, &vpx_sad64x64_msa, -1),
+  make_tuple(64, 32, &vpx_sad64x32_msa, -1),
+  make_tuple(32, 64, &vpx_sad32x64_msa, -1),
+  make_tuple(32, 32, &vpx_sad32x32_msa, -1),
+  make_tuple(32, 16, &vpx_sad32x16_msa, -1),
+  make_tuple(16, 32, &vpx_sad16x32_msa, -1),
+  make_tuple(16, 16, &vpx_sad16x16_msa, -1),
+  make_tuple(16, 8, &vpx_sad16x8_msa, -1),
+  make_tuple(8, 16, &vpx_sad8x16_msa, -1),
+  make_tuple(8, 8, &vpx_sad8x8_msa, -1),
+  make_tuple(8, 4, &vpx_sad8x4_msa, -1),
+  make_tuple(4, 8, &vpx_sad4x8_msa, -1),
+  make_tuple(4, 4, &vpx_sad4x4_msa, -1),
 };
 INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests));
 
-const SadMxNAvgFunc sad64x64_avg_msa = vpx_sad64x64_avg_msa;
-const SadMxNAvgFunc sad64x32_avg_msa = vpx_sad64x32_avg_msa;
-const SadMxNAvgFunc sad32x64_avg_msa = vpx_sad32x64_avg_msa;
-const SadMxNAvgFunc sad32x32_avg_msa = vpx_sad32x32_avg_msa;
-const SadMxNAvgFunc sad32x16_avg_msa = vpx_sad32x16_avg_msa;
-const SadMxNAvgFunc sad16x32_avg_msa = vpx_sad16x32_avg_msa;
-const SadMxNAvgFunc sad16x16_avg_msa = vpx_sad16x16_avg_msa;
-const SadMxNAvgFunc sad16x8_avg_msa = vpx_sad16x8_avg_msa;
-const SadMxNAvgFunc sad8x16_avg_msa = vpx_sad8x16_avg_msa;
-const SadMxNAvgFunc sad8x8_avg_msa = vpx_sad8x8_avg_msa;
-const SadMxNAvgFunc sad8x4_avg_msa = vpx_sad8x4_avg_msa;
-const SadMxNAvgFunc sad4x8_avg_msa = vpx_sad4x8_avg_msa;
-const SadMxNAvgFunc sad4x4_avg_msa = vpx_sad4x4_avg_msa;
 const SadMxNAvgParam avg_msa_tests[] = {
-  make_tuple(64, 64, sad64x64_avg_msa, -1),
-  make_tuple(64, 32, sad64x32_avg_msa, -1),
-  make_tuple(32, 64, sad32x64_avg_msa, -1),
-  make_tuple(32, 32, sad32x32_avg_msa, -1),
-  make_tuple(32, 16, sad32x16_avg_msa, -1),
-  make_tuple(16, 32, sad16x32_avg_msa, -1),
-  make_tuple(16, 16, sad16x16_avg_msa, -1),
-  make_tuple(16, 8, sad16x8_avg_msa, -1),
-  make_tuple(8, 16, sad8x16_avg_msa, -1),
-  make_tuple(8, 8, sad8x8_avg_msa, -1),
-  make_tuple(8, 4, sad8x4_avg_msa, -1),
-  make_tuple(4, 8, sad4x8_avg_msa, -1),
-  make_tuple(4, 4, sad4x4_avg_msa, -1),
+  make_tuple(64, 64, &vpx_sad64x64_avg_msa, -1),
+  make_tuple(64, 32, &vpx_sad64x32_avg_msa, -1),
+  make_tuple(32, 64, &vpx_sad32x64_avg_msa, -1),
+  make_tuple(32, 32, &vpx_sad32x32_avg_msa, -1),
+  make_tuple(32, 16, &vpx_sad32x16_avg_msa, -1),
+  make_tuple(16, 32, &vpx_sad16x32_avg_msa, -1),
+  make_tuple(16, 16, &vpx_sad16x16_avg_msa, -1),
+  make_tuple(16, 8, &vpx_sad16x8_avg_msa, -1),
+  make_tuple(8, 16, &vpx_sad8x16_avg_msa, -1),
+  make_tuple(8, 8, &vpx_sad8x8_avg_msa, -1),
+  make_tuple(8, 4, &vpx_sad8x4_avg_msa, -1),
+  make_tuple(4, 8, &vpx_sad4x8_avg_msa, -1),
+  make_tuple(4, 4, &vpx_sad4x4_avg_msa, -1),
 };
 INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests));
 
-const SadMxNx4Func sad64x64x4d_msa = vpx_sad64x64x4d_msa;
-const SadMxNx4Func sad64x32x4d_msa = vpx_sad64x32x4d_msa;
-const SadMxNx4Func sad32x64x4d_msa = vpx_sad32x64x4d_msa;
-const SadMxNx4Func sad32x32x4d_msa = vpx_sad32x32x4d_msa;
-const SadMxNx4Func sad32x16x4d_msa = vpx_sad32x16x4d_msa;
-const SadMxNx4Func sad16x32x4d_msa = vpx_sad16x32x4d_msa;
-const SadMxNx4Func sad16x16x4d_msa = vpx_sad16x16x4d_msa;
-const SadMxNx4Func sad16x8x4d_msa = vpx_sad16x8x4d_msa;
-const SadMxNx4Func sad8x16x4d_msa = vpx_sad8x16x4d_msa;
-const SadMxNx4Func sad8x8x4d_msa = vpx_sad8x8x4d_msa;
-const SadMxNx4Func sad8x4x4d_msa = vpx_sad8x4x4d_msa;
-const SadMxNx4Func sad4x8x4d_msa = vpx_sad4x8x4d_msa;
-const SadMxNx4Func sad4x4x4d_msa = vpx_sad4x4x4d_msa;
 const SadMxNx4Param x4d_msa_tests[] = {
-  make_tuple(64, 64, sad64x64x4d_msa, -1),
-  make_tuple(64, 32, sad64x32x4d_msa, -1),
-  make_tuple(32, 64, sad32x64x4d_msa, -1),
-  make_tuple(32, 32, sad32x32x4d_msa, -1),
-  make_tuple(32, 16, sad32x16x4d_msa, -1),
-  make_tuple(16, 32, sad16x32x4d_msa, -1),
-  make_tuple(16, 16, sad16x16x4d_msa, -1),
-  make_tuple(16, 8, sad16x8x4d_msa, -1),
-  make_tuple(8, 16, sad8x16x4d_msa, -1),
-  make_tuple(8, 8, sad8x8x4d_msa, -1),
-  make_tuple(8, 4, sad8x4x4d_msa, -1),
-  make_tuple(4, 8, sad4x8x4d_msa, -1),
-  make_tuple(4, 4, sad4x4x4d_msa, -1),
+  make_tuple(64, 64, &vpx_sad64x64x4d_msa, -1),
+  make_tuple(64, 32, &vpx_sad64x32x4d_msa, -1),
+  make_tuple(32, 64, &vpx_sad32x64x4d_msa, -1),
+  make_tuple(32, 32, &vpx_sad32x32x4d_msa, -1),
+  make_tuple(32, 16, &vpx_sad32x16x4d_msa, -1),
+  make_tuple(16, 32, &vpx_sad16x32x4d_msa, -1),
+  make_tuple(16, 16, &vpx_sad16x16x4d_msa, -1),
+  make_tuple(16, 8, &vpx_sad16x8x4d_msa, -1),
+  make_tuple(8, 16, &vpx_sad8x16x4d_msa, -1),
+  make_tuple(8, 8, &vpx_sad8x8x4d_msa, -1),
+  make_tuple(8, 4, &vpx_sad8x4x4d_msa, -1),
+  make_tuple(4, 8, &vpx_sad4x8x4d_msa, -1),
+  make_tuple(4, 4, &vpx_sad4x4x4d_msa, -1),
 };
 INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));
 #endif  // HAVE_MSA

diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc
index 1e682e7..304a148 100644
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc

@@ -186,70 +186,48 @@
 
 using std::tr1::make_tuple;
 
-const SixtapPredictFunc sixtap_16x16_c = vp8_sixtap_predict16x16_c;
-const SixtapPredictFunc sixtap_8x8_c = vp8_sixtap_predict8x8_c;
-const SixtapPredictFunc sixtap_8x4_c = vp8_sixtap_predict8x4_c;
-const SixtapPredictFunc sixtap_4x4_c = vp8_sixtap_predict4x4_c;
 INSTANTIATE_TEST_CASE_P(
     C, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, sixtap_16x16_c),
-        make_tuple(8, 8, sixtap_8x8_c),
-        make_tuple(8, 4, sixtap_8x4_c),
-        make_tuple(4, 4, sixtap_4x4_c)));
+        make_tuple(16, 16, &vp8_sixtap_predict16x16_c),
+        make_tuple(8, 8, &vp8_sixtap_predict8x8_c),
+        make_tuple(8, 4, &vp8_sixtap_predict8x4_c),
+        make_tuple(4, 4, &vp8_sixtap_predict4x4_c)));
 #if HAVE_NEON
-const SixtapPredictFunc sixtap_16x16_neon = vp8_sixtap_predict16x16_neon;
-const SixtapPredictFunc sixtap_8x8_neon = vp8_sixtap_predict8x8_neon;
-const SixtapPredictFunc sixtap_8x4_neon = vp8_sixtap_predict8x4_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, sixtap_16x16_neon),
-        make_tuple(8, 8, sixtap_8x8_neon),
-        make_tuple(8, 4, sixtap_8x4_neon)));
+        make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),
+        make_tuple(8, 8, &vp8_sixtap_predict8x8_neon),
+        make_tuple(8, 4, &vp8_sixtap_predict8x4_neon)));
 #endif
 #if HAVE_MMX
-const SixtapPredictFunc sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx;
-const SixtapPredictFunc sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx;
-const SixtapPredictFunc sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx;
-const SixtapPredictFunc sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx;
 INSTANTIATE_TEST_CASE_P(
     MMX, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, sixtap_16x16_mmx),
-        make_tuple(8, 8, sixtap_8x8_mmx),
-        make_tuple(8, 4, sixtap_8x4_mmx),
-        make_tuple(4, 4, sixtap_4x4_mmx)));
+        make_tuple(16, 16, &vp8_sixtap_predict16x16_mmx),
+        make_tuple(8, 8, &vp8_sixtap_predict8x8_mmx),
+        make_tuple(8, 4, &vp8_sixtap_predict8x4_mmx),
+        make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx)));
 #endif
 #if HAVE_SSE2
-const SixtapPredictFunc sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2;
-const SixtapPredictFunc sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2;
-const SixtapPredictFunc sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, sixtap_16x16_sse2),
-        make_tuple(8, 8, sixtap_8x8_sse2),
-        make_tuple(8, 4, sixtap_8x4_sse2)));
+        make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2),
+        make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2),
+        make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2)));
 #endif
 #if HAVE_SSSE3
-const SixtapPredictFunc sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3;
-const SixtapPredictFunc sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3;
-const SixtapPredictFunc sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3;
-const SixtapPredictFunc sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, sixtap_16x16_ssse3),
-        make_tuple(8, 8, sixtap_8x8_ssse3),
-        make_tuple(8, 4, sixtap_8x4_ssse3),
-        make_tuple(4, 4, sixtap_4x4_ssse3)));
+        make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3),
+        make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3),
+        make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3),
+        make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3)));
 #endif
 #if HAVE_MSA
-const SixtapPredictFunc sixtap_16x16_msa = vp8_sixtap_predict16x16_msa;
-const SixtapPredictFunc sixtap_8x8_msa = vp8_sixtap_predict8x8_msa;
-const SixtapPredictFunc sixtap_8x4_msa = vp8_sixtap_predict8x4_msa;
-const SixtapPredictFunc sixtap_4x4_msa = vp8_sixtap_predict4x4_msa;
 INSTANTIATE_TEST_CASE_P(
     MSA, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, sixtap_16x16_msa),
-        make_tuple(8, 8, sixtap_8x8_msa),
-        make_tuple(8, 4, sixtap_8x4_msa),
-        make_tuple(4, 4, sixtap_4x4_msa)));
+        make_tuple(16, 16, &vp8_sixtap_predict16x16_msa),
+        make_tuple(8, 8, &vp8_sixtap_predict8x8_msa),
+        make_tuple(8, 4, &vp8_sixtap_predict8x4_msa),
+        make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
 #endif
 }  // namespace

diff --git a/test/superframe_test.cc b/test/superframe_test.cc
index a8102b7..90aa75b 100644
--- a/test/superframe_test.cc
+++ b/test/superframe_test.cc

@@ -16,8 +16,13 @@
 
 namespace {
 
+const int kTestMode = 0;
+const int kSuperframeSyntax = 1;
+
+typedef std::tr1::tuple<libvpx_test::TestMode,int> SuperframeTestParam;
+
 class SuperframeTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+    public ::libvpx_test::CodecTestWithParam<SuperframeTestParam> {
  protected:
   SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL),
       last_sf_pts_(0) {}
@@ -25,9 +30,13 @@
 
   virtual void SetUp() {
     InitializeConfig();
-    SetMode(GET_PARAM(1));
+    const SuperframeTestParam input = GET_PARAM(1);
+    const libvpx_test::TestMode mode = std::tr1::get<kTestMode>(input);
+    const int syntax = std::tr1::get<kSuperframeSyntax>(input);
+    SetMode(mode);
     sf_count_ = 0;
     sf_count_max_ = INT_MAX;
+    is_vp10_style_superframe_ = syntax;
   }
 
   virtual void TearDown() {
@@ -50,7 +59,8 @@
     const uint8_t marker = buffer[pkt->data.frame.sz - 1];
     const int frames = (marker & 0x7) + 1;
     const int mag = ((marker >> 3) & 3) + 1;
-    const unsigned int index_sz = 2 + mag  * frames;
+    const unsigned int index_sz =
+        2 + mag * (frames - is_vp10_style_superframe_);
     if ((marker & 0xe0) == 0xc0 &&
         pkt->data.frame.sz >= index_sz &&
         buffer[pkt->data.frame.sz - index_sz] == marker) {
@@ -75,6 +85,7 @@
     return pkt;
   }
 
+  int is_vp10_style_superframe_;
   int sf_count_;
   int sf_count_max_;
   vpx_codec_cx_pkt_t modified_pkt_;
@@ -92,9 +103,11 @@
   EXPECT_EQ(sf_count_, 1);
 }
 
-VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
-    ::libvpx_test::kTwoPassGood));
+VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
+    ::testing::Values(::libvpx_test::kTwoPassGood),
+    ::testing::Values(0)));
 
-VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
-    ::libvpx_test::kTwoPassGood));
+VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
+    ::testing::Values(::libvpx_test::kTwoPassGood),
+    ::testing::Values(CONFIG_MISC_FIXES)));
 }  // namespace

diff --git a/test/test-data.mk b/test/test-data.mk
index 4280b35..43c98bf 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk

@@ -418,6 +418,18 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x64.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-130x132.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-130x132.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x130.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x130.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x132.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x132.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-178x180.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-178x180.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x178.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x178.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x180.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x180.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
@@ -642,6 +654,34 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
@@ -769,3 +809,53 @@
 
 # sort and remove duplicates
 LIBVPX_TEST_DATA-yes := $(sort $(LIBVPX_TEST_DATA-yes))
+
+# VP9 dynamic resizing test (decoder)
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5

diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 4e4ac62..357f377 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1

@@ -744,3 +744,91 @@
 0ae808dca4d3c1152a9576e14830b6faa39f1b4a *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res
 9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m
 5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m
+85771f6ab44e4a0226e206c0cde8351dd5918953 *vp90-2-02-size-130x132.webm
+512dad5eabbed37b4bbbc64ce153f1a5484427b8 *vp90-2-02-size-130x132.webm.md5
+01f7127d40360289db63b27f61cb9afcda350e95 *vp90-2-02-size-132x130.webm
+4a94275328ae076cf60f966c097a8721010fbf5a *vp90-2-02-size-132x130.webm.md5
+f41c0400b5716b4b70552c40dd03d44be131e1cc *vp90-2-02-size-132x132.webm
+1a69e989f697e424bfe3e3e8a77bb0c0992c8e47 *vp90-2-02-size-132x132.webm.md5
+94a5cbfacacba100e0c5f7861c72a1b417feca0f *vp90-2-02-size-178x180.webm
+dedfecf1d784bcf70629592fa5e6f01d5441ccc9 *vp90-2-02-size-178x180.webm.md5
+4828b62478c04014bba3095a83106911a71cf387 *vp90-2-02-size-180x178.webm
+423da2b861050c969d78ed8e8f8f14045d1d8199 *vp90-2-02-size-180x178.webm.md5
+338f7c9282f43e29940f5391118aadd17e4f9234 *vp90-2-02-size-180x180.webm
+6c2ef013392310778dca5dd5351160eca66b0a60 *vp90-2-02-size-180x180.webm.md5
+679fa7d6807e936ff937d7b282e7dbd8ac76447e *vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm
+fc7267ab8fc2bf5d6c234e34ee6c078a967b4888 *vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm.md5
+9d33a137c819792209c5ce4e4e1ee5da73d574fe *vp90-2-14-resize-10frames-fp-tiles-1-2.webm
+0c78a154956a8605d050bdd75e0dcc4d39c040a6 *vp90-2-14-resize-10frames-fp-tiles-1-2.webm.md5
+d6a8d8c57f66a91d23e8e7df480f9ae841e56c37 *vp90-2-14-resize-10frames-fp-tiles-1-4.webm
+e9b4e8c7b33b5fda745d340c3f47e6623ae40cf2 *vp90-2-14-resize-10frames-fp-tiles-1-4.webm.md5
+aa6fe043a0c4a42b49c87ebbe812d4afd9945bec *vp90-2-14-resize-10frames-fp-tiles-1-8.webm
+028520578994c2d013d4c0129033d4f2ff31bbe0 *vp90-2-14-resize-10frames-fp-tiles-1-8.webm.md5
+d1d5463c9ea7b5cc5f609ddedccddf656f348d1a *vp90-2-14-resize-10frames-fp-tiles-2-1.webm
+92d5872f5bdffbed721703b7e959b4f885e3d77a *vp90-2-14-resize-10frames-fp-tiles-2-1.webm.md5
+677cb29de1215d97346015af5807a9b1faad54cf *vp90-2-14-resize-10frames-fp-tiles-2-4.webm
+a5db19f977094ec3fd60b4f7671b3e6740225e12 *vp90-2-14-resize-10frames-fp-tiles-2-4.webm.md5
+cdd3c52ba21067efdbb2de917fe2a965bf27332e *vp90-2-14-resize-10frames-fp-tiles-2-8.webm
+db17ec5d894ea8b8d0b7f32206d0dd3d46dcfa6d *vp90-2-14-resize-10frames-fp-tiles-2-8.webm.md5
+0f6093c472125d05b764d7d1965c1d56771c0ea2 *vp90-2-14-resize-10frames-fp-tiles-4-1.webm
+bc7c79e1bee07926dd970462ce6f64fc30eec3e1 *vp90-2-14-resize-10frames-fp-tiles-4-1.webm.md5
+c5142e2bff4091338196c8ea8bc9266e64f548bc *vp90-2-14-resize-10frames-fp-tiles-4-2.webm
+22aa3dd430b69fd3d92f6561bac86deeed90486d *vp90-2-14-resize-10frames-fp-tiles-4-2.webm.md5
+ede8b1466d2f26e1b1bd9602addb9cd1017e1d8c *vp90-2-14-resize-10frames-fp-tiles-4-8.webm
+508d5ebb9c0eac2a4100281a3ee052ec2fc19217 *vp90-2-14-resize-10frames-fp-tiles-4-8.webm.md5
+2b292e3392854cd1d76ae597a6f53656cf741cfa *vp90-2-14-resize-10frames-fp-tiles-8-1.webm
+1c24e54fa19e94e1722f24676404444e941c3d31 *vp90-2-14-resize-10frames-fp-tiles-8-1.webm.md5
+61beda21064e09634564caa6697ab90bd53c9af7 *vp90-2-14-resize-10frames-fp-tiles-8-2.webm
+9c0657b4d9e1d0e4c9d28a90e5a8630a65519124 *vp90-2-14-resize-10frames-fp-tiles-8-2.webm.md5
+1758c50a11a7c92522749b4a251664705f1f0d4b *vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm
+4f454a06750614314ae15a44087b79016fe2db97 *vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm.md5
+3920c95ba94f1f048a731d9d9b416043b44aa4bd *vp90-2-14-resize-10frames-fp-tiles-8-4.webm
+4eb347a0456d2c49a1e1d8de5aa1c51acc39887e *vp90-2-14-resize-10frames-fp-tiles-8-4.webm.md5
+4b95a74c032a473b6683d7ad5754db1b0ec378e9 *vp90-2-21-resize_inter_1280x720_5_1-2.webm
+a7826dd386bedfe69d02736969bfb47fb6a40a5e *vp90-2-21-resize_inter_1280x720_5_1-2.webm.md5
+5cfff79e82c4d69964ccb8e75b4f0c53b9295167 *vp90-2-21-resize_inter_1280x720_5_3-4.webm
+a18f57db4a25e1f543a99f2ceb182e00db0ee22f *vp90-2-21-resize_inter_1280x720_5_3-4.webm.md5
+d26db0811bf30eb4131d928669713e2485f8e833 *vp90-2-21-resize_inter_1280x720_7_1-2.webm
+fd6f9f332cd5bea4c0f0d57be4297bea493cc5a1 *vp90-2-21-resize_inter_1280x720_7_1-2.webm.md5
+5c7d73d4d268e2ba9593b31cb091fd339505c7fd *vp90-2-21-resize_inter_1280x720_7_3-4.webm
+7bbb949cabc1e70dadcc74582739f63b833034e0 *vp90-2-21-resize_inter_1280x720_7_3-4.webm.md5
+f2d2a41a60eb894aff0c5854afca15931f1445a8 *vp90-2-21-resize_inter_1920x1080_5_1-2.webm
+66d7789992613ac9d678ff905ff1059daa1b89e4 *vp90-2-21-resize_inter_1920x1080_5_1-2.webm.md5
+764edb75fe7dd64e73a1b4f3b4b2b1bf237a4dea *vp90-2-21-resize_inter_1920x1080_5_3-4.webm
+f78bea1075983fd990e7f25d4f31438f9b5efa34 *vp90-2-21-resize_inter_1920x1080_5_3-4.webm.md5
+96496f2ade764a5de9f0c27917c7df1f120fb2ef *vp90-2-21-resize_inter_1920x1080_7_1-2.webm
+2632b635135ed5ecd67fd22dec7990d29c4f4cb5 *vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5
+74889ea42001bf41428cb742ca74e65129c886dc *vp90-2-21-resize_inter_1920x1080_7_3-4.webm
+d2cf3b25956415bb579d368e7098097e482dd73a *vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5
+4658986a8ce36ebfcc80a1903e446eaab3985336 *vp90-2-21-resize_inter_320x180_5_1-2.webm
+8a3d8cf325109ffa913cc9426c32eea8c202a09a *vp90-2-21-resize_inter_320x180_5_1-2.webm.md5
+16303aa45176520ee42c2c425247aadc1506b881 *vp90-2-21-resize_inter_320x180_5_3-4.webm
+41cab1ddf7715b680a4dbce42faa9bcd72af4e5c *vp90-2-21-resize_inter_320x180_5_3-4.webm.md5
+56648adcee66dd0e5cb6ac947f5ee1b9cc8ba129 *vp90-2-21-resize_inter_320x180_7_1-2.webm
+70047377787003cc03dda7b2394e6d7eaa666d9e *vp90-2-21-resize_inter_320x180_7_1-2.webm.md5
+d2ff99165488499cc55f75929f1ce5ca9c9e359b *vp90-2-21-resize_inter_320x180_7_3-4.webm
+e69019e378114a4643db283b66d1a7e304761a56 *vp90-2-21-resize_inter_320x180_7_3-4.webm.md5
+4834d129bed0f4289d3a88f2ae3a1736f77621b0 *vp90-2-21-resize_inter_320x240_5_1-2.webm
+a75653c53d22b623c1927fc0088da21dafef21f4 *vp90-2-21-resize_inter_320x240_5_1-2.webm.md5
+19818e1b7fd1c1e63d8873c31b0babe29dd33ba6 *vp90-2-21-resize_inter_320x240_5_3-4.webm
+8d89814ff469a186312111651b16601dfbce4336 *vp90-2-21-resize_inter_320x240_5_3-4.webm.md5
+ac8057bae52498f324ce92a074d5f8207cc4a4a7 *vp90-2-21-resize_inter_320x240_7_1-2.webm
+2643440898c83c08cc47bc744245af696b877c24 *vp90-2-21-resize_inter_320x240_7_1-2.webm.md5
+cf4a4cd38ac8b18c42d8c25a3daafdb39132256b *vp90-2-21-resize_inter_320x240_7_3-4.webm
+70ba8ec9120b26e9b0ffa2c79b432f16cbcb50ec *vp90-2-21-resize_inter_320x240_7_3-4.webm.md5
+669f10409fe1c4a054010162ca47773ea1fdbead *vp90-2-21-resize_inter_640x360_5_1-2.webm
+6355a04249004a35fb386dd1024214234f044383 *vp90-2-21-resize_inter_640x360_5_1-2.webm.md5
+c23763b950b8247c1775d1f8158d93716197676c *vp90-2-21-resize_inter_640x360_5_3-4.webm
+59e6fc381e3ec3b7bdaac586334e0bc944d18fb6 *vp90-2-21-resize_inter_640x360_5_3-4.webm.md5
+71b45cbfdd068baa1f679a69e5e6f421d256a85f *vp90-2-21-resize_inter_640x360_7_1-2.webm
+1416fc761b690c54a955c4cf017fa078520e8c18 *vp90-2-21-resize_inter_640x360_7_1-2.webm.md5
+6c409903279448a697e4db63bab1061784bcd8d2 *vp90-2-21-resize_inter_640x360_7_3-4.webm
+60de1299793433a630b71130cf76c9f5965758e2 *vp90-2-21-resize_inter_640x360_7_3-4.webm.md5
+852b597b8af096d90c80bf0ed6ed3b336b851f19 *vp90-2-21-resize_inter_640x480_5_1-2.webm
+f6856f19236ee46ed462bd0a2e7e72b9c3b9cea6 *vp90-2-21-resize_inter_640x480_5_1-2.webm.md5
+792a16c6f60043bd8dceb515f0b95b8891647858 *vp90-2-21-resize_inter_640x480_5_3-4.webm
+68ffe59877e9a7863805e1c0a3ce18ce037d7c9d *vp90-2-21-resize_inter_640x480_5_3-4.webm.md5
+61e044c4759972a35ea3db8c1478a988910a4ef4 *vp90-2-21-resize_inter_640x480_7_1-2.webm
+7739bfca167b1b43fea72f807f01e097b7cb98d8 *vp90-2-21-resize_inter_640x480_7_1-2.webm.md5
+7291af354b4418917eee00e3a7e366086a0b7a10 *vp90-2-21-resize_inter_640x480_7_3-4.webm
+4a18b09ccb36564193f0215f599d745d95bb558c *vp90-2-21-resize_inter_640x480_7_3-4.webm.md5

diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 437ce44..6f0cbdf 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc

@@ -10,6 +10,7 @@
 
 #include <cstdio>
 #include <cstdlib>
+#include <set>
 #include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "../tools_common.h"
@@ -44,6 +45,12 @@
   TestVectorTest()
       : DecoderTest(GET_PARAM(0)),
         md5_file_(NULL) {
+#if CONFIG_VP9_DECODER
+    resize_clips_.insert(
+      ::libvpx_test::kVP9TestVectorsResize,
+      ::libvpx_test::kVP9TestVectorsResize +
+          ::libvpx_test::kNumVP9TestVectorsResize);
+#endif
   }
 
   virtual ~TestVectorTest() {
@@ -77,6 +84,10 @@
         << "Md5 checksums don't match: frame number = " << frame_number;
   }
 
+#if CONFIG_VP9_DECODER
+  std::set<std::string> resize_clips_;
+#endif
+
  private:
   FILE *md5_file_;
 };
@@ -97,6 +108,14 @@
 
   if (mode == kFrameParallelMode) {
     flags |= VPX_CODEC_USE_FRAME_THREADING;
+#if CONFIG_VP9_DECODER
+    // TODO(hkuang): Fix frame parallel decode bug. See issue 1086.
+    if (resize_clips_.find(filename) != resize_clips_.end()) {
+      printf("Skipping the test file: %s, due to frame parallel decode bug.\n",
+             filename.c_str());
+      return;
+    }
+#endif
   }
 
   cfg.threads = threads;

diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index 434a382..634ea44 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc

@@ -52,6 +52,31 @@
 const int kNumVP8TestVectors = NELEMENTS(kVP8TestVectors);
 #endif  // CONFIG_VP8_DECODER
 #if CONFIG_VP9_DECODER
+#define RESIZE_TEST_VECTORS "vp90-2-21-resize_inter_320x180_5_1-2.webm", \
+  "vp90-2-21-resize_inter_320x180_5_3-4.webm", \
+  "vp90-2-21-resize_inter_320x180_7_1-2.webm", \
+  "vp90-2-21-resize_inter_320x180_7_3-4.webm", \
+  "vp90-2-21-resize_inter_320x240_5_1-2.webm", \
+  "vp90-2-21-resize_inter_320x240_5_3-4.webm", \
+  "vp90-2-21-resize_inter_320x240_7_1-2.webm", \
+  "vp90-2-21-resize_inter_320x240_7_3-4.webm", \
+  "vp90-2-21-resize_inter_640x360_5_1-2.webm", \
+  "vp90-2-21-resize_inter_640x360_5_3-4.webm", \
+  "vp90-2-21-resize_inter_640x360_7_1-2.webm", \
+  "vp90-2-21-resize_inter_640x360_7_3-4.webm", \
+  "vp90-2-21-resize_inter_640x480_5_1-2.webm", \
+  "vp90-2-21-resize_inter_640x480_5_3-4.webm", \
+  "vp90-2-21-resize_inter_640x480_7_1-2.webm", \
+  "vp90-2-21-resize_inter_640x480_7_3-4.webm", \
+  "vp90-2-21-resize_inter_1280x720_5_1-2.webm", \
+  "vp90-2-21-resize_inter_1280x720_5_3-4.webm", \
+  "vp90-2-21-resize_inter_1280x720_7_1-2.webm", \
+  "vp90-2-21-resize_inter_1280x720_7_3-4.webm", \
+  "vp90-2-21-resize_inter_1920x1080_5_1-2.webm", \
+  "vp90-2-21-resize_inter_1920x1080_5_3-4.webm", \
+  "vp90-2-21-resize_inter_1920x1080_7_1-2.webm", \
+  "vp90-2-21-resize_inter_1920x1080_7_3-4.webm",
+
 const char *const kVP9TestVectors[] = {
   "vp90-2-00-quantizer-00.webm", "vp90-2-00-quantizer-01.webm",
   "vp90-2-00-quantizer-02.webm", "vp90-2-00-quantizer-03.webm",
@@ -120,7 +145,10 @@
   "vp90-2-02-size-66x10.webm", "vp90-2-02-size-66x16.webm",
   "vp90-2-02-size-66x18.webm", "vp90-2-02-size-66x32.webm",
   "vp90-2-02-size-66x34.webm", "vp90-2-02-size-66x64.webm",
-  "vp90-2-02-size-66x66.webm", "vp90-2-03-size-196x196.webm",
+  "vp90-2-02-size-66x66.webm", "vp90-2-02-size-130x132.webm",
+  "vp90-2-02-size-132x130.webm", "vp90-2-02-size-132x132.webm",
+  "vp90-2-02-size-178x180.webm", "vp90-2-02-size-180x178.webm",
+  "vp90-2-02-size-180x180.webm", "vp90-2-03-size-196x196.webm",
   "vp90-2-03-size-196x198.webm", "vp90-2-03-size-196x200.webm",
   "vp90-2-03-size-196x202.webm", "vp90-2-03-size-196x208.webm",
   "vp90-2-03-size-196x210.webm", "vp90-2-03-size-196x224.webm",
@@ -182,6 +210,20 @@
   "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
   "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
   "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-1-2.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-1-4.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-1-8.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-2-1.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-2-4.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-2-8.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-4-1.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-4-2.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-4-8.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-8-1.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-8-2.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm",
+  "vp90-2-14-resize-10frames-fp-tiles-8-4.webm",
   "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm",
   "vp90-2-16-intra-only.webm", "vp90-2-17-show-existing-frame.webm",
   "vp90-2-18-resize.ivf", "vp90-2-19-skip.webm",
@@ -193,10 +235,16 @@
   "vp93-2-20-10bit-yuv422.webm", "vp93-2-20-12bit-yuv422.webm",
   "vp93-2-20-10bit-yuv440.webm", "vp93-2-20-12bit-yuv440.webm",
   "vp93-2-20-10bit-yuv444.webm", "vp93-2-20-12bit-yuv444.webm",
-#endif  // CONFIG_VP9_HIGHBITDEPTH`
+#endif  // CONFIG_VP9_HIGHBITDEPTH
   "vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm",
+  RESIZE_TEST_VECTORS
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
+const char *const kVP9TestVectorsResize[] = {
+  RESIZE_TEST_VECTORS
+};
+const int kNumVP9TestVectorsResize = NELEMENTS(kVP9TestVectorsResize);
+#undef RESIZE_TEST_VECTORS
 #endif  // CONFIG_VP9_DECODER
 
 }  // namespace libvpx_test

diff --git a/test/test_vectors.h b/test/test_vectors.h
index 8e1aabb..2c6918a 100644
--- a/test/test_vectors.h
+++ b/test/test_vectors.h

@@ -23,6 +23,8 @@
 #if CONFIG_VP9_DECODER
 extern const int kNumVP9TestVectors;
 extern const char *const kVP9TestVectors[];
+extern const int kNumVP9TestVectorsResize;
+extern const char *const kVP9TestVectorsResize[];
 #endif  // CONFIG_VP9_DECODER
 
 }  // namespace libvpx_test

diff --git a/test/variance_test.cc b/test/variance_test.cc
index 7a34db6..6f50f78 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc

@@ -747,115 +747,63 @@
 INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest,
                         ::testing::Values(vpx_get_mb_ss_c));
 
-const Get4x4SseFunc get4x4sse_cs_c = vpx_get4x4sse_cs_c;
 INSTANTIATE_TEST_CASE_P(C, VpxSseTest,
-                        ::testing::Values(make_tuple(2, 2, get4x4sse_cs_c)));
+                        ::testing::Values(make_tuple(2, 2,
+                                                     &vpx_get4x4sse_cs_c)));
 
-const VarianceMxNFunc mse16x16_c = vpx_mse16x16_c;
-const VarianceMxNFunc mse16x8_c = vpx_mse16x8_c;
-const VarianceMxNFunc mse8x16_c = vpx_mse8x16_c;
-const VarianceMxNFunc mse8x8_c = vpx_mse8x8_c;
 INSTANTIATE_TEST_CASE_P(C, VpxMseTest,
-                        ::testing::Values(make_tuple(4, 4, mse16x16_c),
-                                          make_tuple(4, 3, mse16x8_c),
-                                          make_tuple(3, 4, mse8x16_c),
-                                          make_tuple(3, 3, mse8x8_c)));
+                        ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_c),
+                                          make_tuple(4, 3, &vpx_mse16x8_c),
+                                          make_tuple(3, 4, &vpx_mse8x16_c),
+                                          make_tuple(3, 3, &vpx_mse8x8_c)));
 
-const VarianceMxNFunc variance64x64_c = vpx_variance64x64_c;
-const VarianceMxNFunc variance64x32_c = vpx_variance64x32_c;
-const VarianceMxNFunc variance32x64_c = vpx_variance32x64_c;
-const VarianceMxNFunc variance32x32_c = vpx_variance32x32_c;
-const VarianceMxNFunc variance32x16_c = vpx_variance32x16_c;
-const VarianceMxNFunc variance16x32_c = vpx_variance16x32_c;
-const VarianceMxNFunc variance16x16_c = vpx_variance16x16_c;
-const VarianceMxNFunc variance16x8_c = vpx_variance16x8_c;
-const VarianceMxNFunc variance8x16_c = vpx_variance8x16_c;
-const VarianceMxNFunc variance8x8_c = vpx_variance8x8_c;
-const VarianceMxNFunc variance8x4_c = vpx_variance8x4_c;
-const VarianceMxNFunc variance4x8_c = vpx_variance4x8_c;
-const VarianceMxNFunc variance4x4_c = vpx_variance4x4_c;
 INSTANTIATE_TEST_CASE_P(
     C, VpxVarianceTest,
-    ::testing::Values(make_tuple(6, 6, variance64x64_c, 0),
-                      make_tuple(6, 5, variance64x32_c, 0),
-                      make_tuple(5, 6, variance32x64_c, 0),
-                      make_tuple(5, 5, variance32x32_c, 0),
-                      make_tuple(5, 4, variance32x16_c, 0),
-                      make_tuple(4, 5, variance16x32_c, 0),
-                      make_tuple(4, 4, variance16x16_c, 0),
-                      make_tuple(4, 3, variance16x8_c, 0),
-                      make_tuple(3, 4, variance8x16_c, 0),
-                      make_tuple(3, 3, variance8x8_c, 0),
-                      make_tuple(3, 2, variance8x4_c, 0),
-                      make_tuple(2, 3, variance4x8_c, 0),
-                      make_tuple(2, 2, variance4x4_c, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_c, 0),
+                      make_tuple(6, 5, &vpx_variance64x32_c, 0),
+                      make_tuple(5, 6, &vpx_variance32x64_c, 0),
+                      make_tuple(5, 5, &vpx_variance32x32_c, 0),
+                      make_tuple(5, 4, &vpx_variance32x16_c, 0),
+                      make_tuple(4, 5, &vpx_variance16x32_c, 0),
+                      make_tuple(4, 4, &vpx_variance16x16_c, 0),
+                      make_tuple(4, 3, &vpx_variance16x8_c, 0),
+                      make_tuple(3, 4, &vpx_variance8x16_c, 0),
+                      make_tuple(3, 3, &vpx_variance8x8_c, 0),
+                      make_tuple(3, 2, &vpx_variance8x4_c, 0),
+                      make_tuple(2, 3, &vpx_variance4x8_c, 0),
+                      make_tuple(2, 2, &vpx_variance4x4_c, 0)));
 
-const SubpixVarMxNFunc subpel_var64x64_c = vpx_sub_pixel_variance64x64_c;
-const SubpixVarMxNFunc subpel_var64x32_c = vpx_sub_pixel_variance64x32_c;
-const SubpixVarMxNFunc subpel_var32x64_c = vpx_sub_pixel_variance32x64_c;
-const SubpixVarMxNFunc subpel_var32x32_c = vpx_sub_pixel_variance32x32_c;
-const SubpixVarMxNFunc subpel_var32x16_c = vpx_sub_pixel_variance32x16_c;
-const SubpixVarMxNFunc subpel_var16x32_c = vpx_sub_pixel_variance16x32_c;
-const SubpixVarMxNFunc subpel_var16x16_c = vpx_sub_pixel_variance16x16_c;
-const SubpixVarMxNFunc subpel_var16x8_c = vpx_sub_pixel_variance16x8_c;
-const SubpixVarMxNFunc subpel_var8x16_c = vpx_sub_pixel_variance8x16_c;
-const SubpixVarMxNFunc subpel_var8x8_c = vpx_sub_pixel_variance8x8_c;
-const SubpixVarMxNFunc subpel_var8x4_c = vpx_sub_pixel_variance8x4_c;
-const SubpixVarMxNFunc subpel_var4x8_c = vpx_sub_pixel_variance4x8_c;
-const SubpixVarMxNFunc subpel_var4x4_c = vpx_sub_pixel_variance4x4_c;
 INSTANTIATE_TEST_CASE_P(
     C, VpxSubpelVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_var64x64_c, 0),
-                      make_tuple(6, 5, subpel_var64x32_c, 0),
-                      make_tuple(5, 6, subpel_var32x64_c, 0),
-                      make_tuple(5, 5, subpel_var32x32_c, 0),
-                      make_tuple(5, 4, subpel_var32x16_c, 0),
-                      make_tuple(4, 5, subpel_var16x32_c, 0),
-                      make_tuple(4, 4, subpel_var16x16_c, 0),
-                      make_tuple(4, 3, subpel_var16x8_c, 0),
-                      make_tuple(3, 4, subpel_var8x16_c, 0),
-                      make_tuple(3, 3, subpel_var8x8_c, 0),
-                      make_tuple(3, 2, subpel_var8x4_c, 0),
-                      make_tuple(2, 3, subpel_var4x8_c, 0),
-                      make_tuple(2, 2, subpel_var4x4_c, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_c, 0),
+                      make_tuple(6, 5, &vpx_sub_pixel_variance64x32_c, 0),
+                      make_tuple(5, 6, &vpx_sub_pixel_variance32x64_c, 0),
+                      make_tuple(5, 5, &vpx_sub_pixel_variance32x32_c, 0),
+                      make_tuple(5, 4, &vpx_sub_pixel_variance32x16_c, 0),
+                      make_tuple(4, 5, &vpx_sub_pixel_variance16x32_c, 0),
+                      make_tuple(4, 4, &vpx_sub_pixel_variance16x16_c, 0),
+                      make_tuple(4, 3, &vpx_sub_pixel_variance16x8_c, 0),
+                      make_tuple(3, 4, &vpx_sub_pixel_variance8x16_c, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_variance8x8_c, 0),
+                      make_tuple(3, 2, &vpx_sub_pixel_variance8x4_c, 0),
+                      make_tuple(2, 3, &vpx_sub_pixel_variance4x8_c, 0),
+                      make_tuple(2, 2, &vpx_sub_pixel_variance4x4_c, 0)));
 
-const SubpixAvgVarMxNFunc subpel_avg_var64x64_c =
-    vpx_sub_pixel_avg_variance64x64_c;
-const SubpixAvgVarMxNFunc subpel_avg_var64x32_c =
-    vpx_sub_pixel_avg_variance64x32_c;
-const SubpixAvgVarMxNFunc subpel_avg_var32x64_c =
-    vpx_sub_pixel_avg_variance32x64_c;
-const SubpixAvgVarMxNFunc subpel_avg_var32x32_c =
-    vpx_sub_pixel_avg_variance32x32_c;
-const SubpixAvgVarMxNFunc subpel_avg_var32x16_c =
-    vpx_sub_pixel_avg_variance32x16_c;
-const SubpixAvgVarMxNFunc subpel_avg_var16x32_c =
-    vpx_sub_pixel_avg_variance16x32_c;
-const SubpixAvgVarMxNFunc subpel_avg_var16x16_c =
-    vpx_sub_pixel_avg_variance16x16_c;
-const SubpixAvgVarMxNFunc subpel_avg_var16x8_c =
-    vpx_sub_pixel_avg_variance16x8_c;
-const SubpixAvgVarMxNFunc subpel_avg_var8x16_c =
-    vpx_sub_pixel_avg_variance8x16_c;
-const SubpixAvgVarMxNFunc subpel_avg_var8x8_c = vpx_sub_pixel_avg_variance8x8_c;
-const SubpixAvgVarMxNFunc subpel_avg_var8x4_c = vpx_sub_pixel_avg_variance8x4_c;
-const SubpixAvgVarMxNFunc subpel_avg_var4x8_c = vpx_sub_pixel_avg_variance4x8_c;
-const SubpixAvgVarMxNFunc subpel_avg_var4x4_c = vpx_sub_pixel_avg_variance4x4_c;
 INSTANTIATE_TEST_CASE_P(
     C, VpxSubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_avg_var64x64_c, 0),
-                      make_tuple(6, 5, subpel_avg_var64x32_c, 0),
-                      make_tuple(5, 6, subpel_avg_var32x64_c, 0),
-                      make_tuple(5, 5, subpel_avg_var32x32_c, 0),
-                      make_tuple(5, 4, subpel_avg_var32x16_c, 0),
-                      make_tuple(4, 5, subpel_avg_var16x32_c, 0),
-                      make_tuple(4, 4, subpel_avg_var16x16_c, 0),
-                      make_tuple(4, 3, subpel_avg_var16x8_c, 0),
-                      make_tuple(3, 4, subpel_avg_var8x16_c, 0),
-                      make_tuple(3, 3, subpel_avg_var8x8_c, 0),
-                      make_tuple(3, 2, subpel_avg_var8x4_c, 0),
-                      make_tuple(2, 3, subpel_avg_var4x8_c, 0),
-                      make_tuple(2, 2, subpel_avg_var4x4_c, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0),
+                      make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0),
+                      make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0),
+                      make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0),
+                      make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_c, 0),
+                      make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_c, 0),
+                      make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_c, 0),
+                      make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_c, 0),
+                      make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_c, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_c, 0),
+                      make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_c, 0),
+                      make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_c, 0),
+                      make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_c, 0)));
 
 #if CONFIG_VP9_HIGHBITDEPTH
 typedef MseTest<VarianceMxNFunc> VpxHBDMseTest;
@@ -875,1166 +823,531 @@
 TEST_P(VpxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
 
 /* TODO(debargha): This test does not support the highbd version
-const VarianceMxNFunc highbd_12_mse16x16_c = vpx_highbd_12_mse16x16_c;
-const VarianceMxNFunc highbd_12_mse16x8_c = vpx_highbd_12_mse16x8_c;
-const VarianceMxNFunc highbd_12_mse8x16_c = vpx_highbd_12_mse8x16_c;
-const VarianceMxNFunc highbd_12_mse8x8_c = vpx_highbd_12_mse8x8_c;
-
-const VarianceMxNFunc highbd_10_mse16x16_c = vpx_highbd_10_mse16x16_c;
-const VarianceMxNFunc highbd_10_mse16x8_c = vpx_highbd_10_mse16x8_c;
-const VarianceMxNFunc highbd_10_mse8x16_c = vpx_highbd_10_mse8x16_c;
-const VarianceMxNFunc highbd_10_mse8x8_c = vpx_highbd_10_mse8x8_c;
-
-const VarianceMxNFunc highbd_8_mse16x16_c = vpx_highbd_8_mse16x16_c;
-const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c;
-const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c;
-const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c;
 INSTANTIATE_TEST_CASE_P(
-    C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c),
-                                        make_tuple(4, 4, highbd_12_mse16x8_c),
-                                        make_tuple(4, 4, highbd_12_mse8x16_c),
-                                        make_tuple(4, 4, highbd_12_mse8x8_c),
-                                        make_tuple(4, 4, highbd_10_mse16x16_c),
-                                        make_tuple(4, 4, highbd_10_mse16x8_c),
-                                        make_tuple(4, 4, highbd_10_mse8x16_c),
-                                        make_tuple(4, 4, highbd_10_mse8x8_c),
-                                        make_tuple(4, 4, highbd_8_mse16x16_c),
-                                        make_tuple(4, 4, highbd_8_mse16x8_c),
-                                        make_tuple(4, 4, highbd_8_mse8x16_c),
-                                        make_tuple(4, 4, highbd_8_mse8x8_c)));
+    C, VpxHBDMseTest,
+    ::testing::Values(make_tuple(4, 4, &vpx_highbd_12_mse16x16_c),
+                      make_tuple(4, 4, &vpx_highbd_12_mse16x8_c),
+                      make_tuple(4, 4, &vpx_highbd_12_mse8x16_c),
+                      make_tuple(4, 4, &vpx_highbd_12_mse8x8_c),
+                      make_tuple(4, 4, &vpx_highbd_10_mse16x16_c),
+                      make_tuple(4, 4, &vpx_highbd_10_mse16x8_c),
+                      make_tuple(4, 4, &vpx_highbd_10_mse8x16_c),
+                      make_tuple(4, 4, &vpx_highbd_10_mse8x8_c),
+                      make_tuple(4, 4, &vpx_highbd_8_mse16x16_c),
+                      make_tuple(4, 4, &vpx_highbd_8_mse16x8_c),
+                      make_tuple(4, 4, &vpx_highbd_8_mse8x16_c),
+                      make_tuple(4, 4, &vpx_highbd_8_mse8x8_c)));
 */
 
-const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c;
-const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c;
-const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c;
-const VarianceMxNFunc highbd_12_variance32x32_c = vpx_highbd_12_variance32x32_c;
-const VarianceMxNFunc highbd_12_variance32x16_c = vpx_highbd_12_variance32x16_c;
-const VarianceMxNFunc highbd_12_variance16x32_c = vpx_highbd_12_variance16x32_c;
-const VarianceMxNFunc highbd_12_variance16x16_c = vpx_highbd_12_variance16x16_c;
-const VarianceMxNFunc highbd_12_variance16x8_c = vpx_highbd_12_variance16x8_c;
-const VarianceMxNFunc highbd_12_variance8x16_c = vpx_highbd_12_variance8x16_c;
-const VarianceMxNFunc highbd_12_variance8x8_c = vpx_highbd_12_variance8x8_c;
-const VarianceMxNFunc highbd_12_variance8x4_c = vpx_highbd_12_variance8x4_c;
-const VarianceMxNFunc highbd_12_variance4x8_c = vpx_highbd_12_variance4x8_c;
-const VarianceMxNFunc highbd_12_variance4x4_c = vpx_highbd_12_variance4x4_c;
-const VarianceMxNFunc highbd_10_variance64x64_c = vpx_highbd_10_variance64x64_c;
-const VarianceMxNFunc highbd_10_variance64x32_c = vpx_highbd_10_variance64x32_c;
-const VarianceMxNFunc highbd_10_variance32x64_c = vpx_highbd_10_variance32x64_c;
-const VarianceMxNFunc highbd_10_variance32x32_c = vpx_highbd_10_variance32x32_c;
-const VarianceMxNFunc highbd_10_variance32x16_c = vpx_highbd_10_variance32x16_c;
-const VarianceMxNFunc highbd_10_variance16x32_c = vpx_highbd_10_variance16x32_c;
-const VarianceMxNFunc highbd_10_variance16x16_c = vpx_highbd_10_variance16x16_c;
-const VarianceMxNFunc highbd_10_variance16x8_c = vpx_highbd_10_variance16x8_c;
-const VarianceMxNFunc highbd_10_variance8x16_c = vpx_highbd_10_variance8x16_c;
-const VarianceMxNFunc highbd_10_variance8x8_c = vpx_highbd_10_variance8x8_c;
-const VarianceMxNFunc highbd_10_variance8x4_c = vpx_highbd_10_variance8x4_c;
-const VarianceMxNFunc highbd_10_variance4x8_c = vpx_highbd_10_variance4x8_c;
-const VarianceMxNFunc highbd_10_variance4x4_c = vpx_highbd_10_variance4x4_c;
-const VarianceMxNFunc highbd_8_variance64x64_c = vpx_highbd_8_variance64x64_c;
-const VarianceMxNFunc highbd_8_variance64x32_c = vpx_highbd_8_variance64x32_c;
-const VarianceMxNFunc highbd_8_variance32x64_c = vpx_highbd_8_variance32x64_c;
-const VarianceMxNFunc highbd_8_variance32x32_c = vpx_highbd_8_variance32x32_c;
-const VarianceMxNFunc highbd_8_variance32x16_c = vpx_highbd_8_variance32x16_c;
-const VarianceMxNFunc highbd_8_variance16x32_c = vpx_highbd_8_variance16x32_c;
-const VarianceMxNFunc highbd_8_variance16x16_c = vpx_highbd_8_variance16x16_c;
-const VarianceMxNFunc highbd_8_variance16x8_c = vpx_highbd_8_variance16x8_c;
-const VarianceMxNFunc highbd_8_variance8x16_c = vpx_highbd_8_variance8x16_c;
-const VarianceMxNFunc highbd_8_variance8x8_c = vpx_highbd_8_variance8x8_c;
-const VarianceMxNFunc highbd_8_variance8x4_c = vpx_highbd_8_variance8x4_c;
-const VarianceMxNFunc highbd_8_variance4x8_c = vpx_highbd_8_variance4x8_c;
-const VarianceMxNFunc highbd_8_variance4x4_c = vpx_highbd_8_variance4x4_c;
 INSTANTIATE_TEST_CASE_P(
     C, VpxHBDVarianceTest,
-    ::testing::Values(make_tuple(6, 6, highbd_12_variance64x64_c, 12),
-                      make_tuple(6, 5, highbd_12_variance64x32_c, 12),
-                      make_tuple(5, 6, highbd_12_variance32x64_c, 12),
-                      make_tuple(5, 5, highbd_12_variance32x32_c, 12),
-                      make_tuple(5, 4, highbd_12_variance32x16_c, 12),
-                      make_tuple(4, 5, highbd_12_variance16x32_c, 12),
-                      make_tuple(4, 4, highbd_12_variance16x16_c, 12),
-                      make_tuple(4, 3, highbd_12_variance16x8_c, 12),
-                      make_tuple(3, 4, highbd_12_variance8x16_c, 12),
-                      make_tuple(3, 3, highbd_12_variance8x8_c, 12),
-                      make_tuple(3, 2, highbd_12_variance8x4_c, 12),
-                      make_tuple(2, 3, highbd_12_variance4x8_c, 12),
-                      make_tuple(2, 2, highbd_12_variance4x4_c, 12),
-                      make_tuple(6, 6, highbd_10_variance64x64_c, 10),
-                      make_tuple(6, 5, highbd_10_variance64x32_c, 10),
-                      make_tuple(5, 6, highbd_10_variance32x64_c, 10),
-                      make_tuple(5, 5, highbd_10_variance32x32_c, 10),
-                      make_tuple(5, 4, highbd_10_variance32x16_c, 10),
-                      make_tuple(4, 5, highbd_10_variance16x32_c, 10),
-                      make_tuple(4, 4, highbd_10_variance16x16_c, 10),
-                      make_tuple(4, 3, highbd_10_variance16x8_c, 10),
-                      make_tuple(3, 4, highbd_10_variance8x16_c, 10),
-                      make_tuple(3, 3, highbd_10_variance8x8_c, 10),
-                      make_tuple(3, 2, highbd_10_variance8x4_c, 10),
-                      make_tuple(2, 3, highbd_10_variance4x8_c, 10),
-                      make_tuple(2, 2, highbd_10_variance4x4_c, 10),
-                      make_tuple(6, 6, highbd_8_variance64x64_c, 8),
-                      make_tuple(6, 5, highbd_8_variance64x32_c, 8),
-                      make_tuple(5, 6, highbd_8_variance32x64_c, 8),
-                      make_tuple(5, 5, highbd_8_variance32x32_c, 8),
-                      make_tuple(5, 4, highbd_8_variance32x16_c, 8),
-                      make_tuple(4, 5, highbd_8_variance16x32_c, 8),
-                      make_tuple(4, 4, highbd_8_variance16x16_c, 8),
-                      make_tuple(4, 3, highbd_8_variance16x8_c, 8),
-                      make_tuple(3, 4, highbd_8_variance8x16_c, 8),
-                      make_tuple(3, 3, highbd_8_variance8x8_c, 8),
-                      make_tuple(3, 2, highbd_8_variance8x4_c, 8),
-                      make_tuple(2, 3, highbd_8_variance4x8_c, 8),
-                      make_tuple(2, 2, highbd_8_variance4x4_c, 8)));
+    ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12),
+                      make_tuple(6, 5, &vpx_highbd_12_variance64x32_c, 12),
+                      make_tuple(5, 6, &vpx_highbd_12_variance32x64_c, 12),
+                      make_tuple(5, 5, &vpx_highbd_12_variance32x32_c, 12),
+                      make_tuple(5, 4, &vpx_highbd_12_variance32x16_c, 12),
+                      make_tuple(4, 5, &vpx_highbd_12_variance16x32_c, 12),
+                      make_tuple(4, 4, &vpx_highbd_12_variance16x16_c, 12),
+                      make_tuple(4, 3, &vpx_highbd_12_variance16x8_c, 12),
+                      make_tuple(3, 4, &vpx_highbd_12_variance8x16_c, 12),
+                      make_tuple(3, 3, &vpx_highbd_12_variance8x8_c, 12),
+                      make_tuple(3, 2, &vpx_highbd_12_variance8x4_c, 12),
+                      make_tuple(2, 3, &vpx_highbd_12_variance4x8_c, 12),
+                      make_tuple(2, 2, &vpx_highbd_12_variance4x4_c, 12),
+                      make_tuple(6, 6, &vpx_highbd_10_variance64x64_c, 10),
+                      make_tuple(6, 5, &vpx_highbd_10_variance64x32_c, 10),
+                      make_tuple(5, 6, &vpx_highbd_10_variance32x64_c, 10),
+                      make_tuple(5, 5, &vpx_highbd_10_variance32x32_c, 10),
+                      make_tuple(5, 4, &vpx_highbd_10_variance32x16_c, 10),
+                      make_tuple(4, 5, &vpx_highbd_10_variance16x32_c, 10),
+                      make_tuple(4, 4, &vpx_highbd_10_variance16x16_c, 10),
+                      make_tuple(4, 3, &vpx_highbd_10_variance16x8_c, 10),
+                      make_tuple(3, 4, &vpx_highbd_10_variance8x16_c, 10),
+                      make_tuple(3, 3, &vpx_highbd_10_variance8x8_c, 10),
+                      make_tuple(3, 2, &vpx_highbd_10_variance8x4_c, 10),
+                      make_tuple(2, 3, &vpx_highbd_10_variance4x8_c, 10),
+                      make_tuple(2, 2, &vpx_highbd_10_variance4x4_c, 10),
+                      make_tuple(6, 6, &vpx_highbd_8_variance64x64_c, 8),
+                      make_tuple(6, 5, &vpx_highbd_8_variance64x32_c, 8),
+                      make_tuple(5, 6, &vpx_highbd_8_variance32x64_c, 8),
+                      make_tuple(5, 5, &vpx_highbd_8_variance32x32_c, 8),
+                      make_tuple(5, 4, &vpx_highbd_8_variance32x16_c, 8),
+                      make_tuple(4, 5, &vpx_highbd_8_variance16x32_c, 8),
+                      make_tuple(4, 4, &vpx_highbd_8_variance16x16_c, 8),
+                      make_tuple(4, 3, &vpx_highbd_8_variance16x8_c, 8),
+                      make_tuple(3, 4, &vpx_highbd_8_variance8x16_c, 8),
+                      make_tuple(3, 3, &vpx_highbd_8_variance8x8_c, 8),
+                      make_tuple(3, 2, &vpx_highbd_8_variance8x4_c, 8),
+                      make_tuple(2, 3, &vpx_highbd_8_variance4x8_c, 8),
+                      make_tuple(2, 2, &vpx_highbd_8_variance4x4_c, 8)));
 
-const SubpixVarMxNFunc highbd_8_subpel_var64x64_c =
-    vpx_highbd_8_sub_pixel_variance64x64_c;
-const SubpixVarMxNFunc highbd_8_subpel_var64x32_c =
-    vpx_highbd_8_sub_pixel_variance64x32_c;
-const SubpixVarMxNFunc highbd_8_subpel_var32x64_c =
-    vpx_highbd_8_sub_pixel_variance32x64_c;
-const SubpixVarMxNFunc highbd_8_subpel_var32x32_c =
-    vpx_highbd_8_sub_pixel_variance32x32_c;
-const SubpixVarMxNFunc highbd_8_subpel_var32x16_c =
-    vpx_highbd_8_sub_pixel_variance32x16_c;
-const SubpixVarMxNFunc highbd_8_subpel_var16x32_c =
-    vpx_highbd_8_sub_pixel_variance16x32_c;
-const SubpixVarMxNFunc highbd_8_subpel_var16x16_c =
-    vpx_highbd_8_sub_pixel_variance16x16_c;
-const SubpixVarMxNFunc highbd_8_subpel_var16x8_c =
-    vpx_highbd_8_sub_pixel_variance16x8_c;
-const SubpixVarMxNFunc highbd_8_subpel_var8x16_c =
-    vpx_highbd_8_sub_pixel_variance8x16_c;
-const SubpixVarMxNFunc highbd_8_subpel_var8x8_c =
-    vpx_highbd_8_sub_pixel_variance8x8_c;
-const SubpixVarMxNFunc highbd_8_subpel_var8x4_c =
-    vpx_highbd_8_sub_pixel_variance8x4_c;
-const SubpixVarMxNFunc highbd_8_subpel_var4x8_c =
-    vpx_highbd_8_sub_pixel_variance4x8_c;
-const SubpixVarMxNFunc highbd_8_subpel_var4x4_c =
-    vpx_highbd_8_sub_pixel_variance4x4_c;
-const SubpixVarMxNFunc highbd_10_subpel_var64x64_c =
-    vpx_highbd_10_sub_pixel_variance64x64_c;
-const SubpixVarMxNFunc highbd_10_subpel_var64x32_c =
-    vpx_highbd_10_sub_pixel_variance64x32_c;
-const SubpixVarMxNFunc highbd_10_subpel_var32x64_c =
-    vpx_highbd_10_sub_pixel_variance32x64_c;
-const SubpixVarMxNFunc highbd_10_subpel_var32x32_c =
-    vpx_highbd_10_sub_pixel_variance32x32_c;
-const SubpixVarMxNFunc highbd_10_subpel_var32x16_c =
-    vpx_highbd_10_sub_pixel_variance32x16_c;
-const SubpixVarMxNFunc highbd_10_subpel_var16x32_c =
-    vpx_highbd_10_sub_pixel_variance16x32_c;
-const SubpixVarMxNFunc highbd_10_subpel_var16x16_c =
-    vpx_highbd_10_sub_pixel_variance16x16_c;
-const SubpixVarMxNFunc highbd_10_subpel_var16x8_c =
-    vpx_highbd_10_sub_pixel_variance16x8_c;
-const SubpixVarMxNFunc highbd_10_subpel_var8x16_c =
-    vpx_highbd_10_sub_pixel_variance8x16_c;
-const SubpixVarMxNFunc highbd_10_subpel_var8x8_c =
-    vpx_highbd_10_sub_pixel_variance8x8_c;
-const SubpixVarMxNFunc highbd_10_subpel_var8x4_c =
-    vpx_highbd_10_sub_pixel_variance8x4_c;
-const SubpixVarMxNFunc highbd_10_subpel_var4x8_c =
-    vpx_highbd_10_sub_pixel_variance4x8_c;
-const SubpixVarMxNFunc highbd_10_subpel_var4x4_c =
-    vpx_highbd_10_sub_pixel_variance4x4_c;
-const SubpixVarMxNFunc highbd_12_subpel_var64x64_c =
-    vpx_highbd_12_sub_pixel_variance64x64_c;
-const SubpixVarMxNFunc highbd_12_subpel_var64x32_c =
-    vpx_highbd_12_sub_pixel_variance64x32_c;
-const SubpixVarMxNFunc highbd_12_subpel_var32x64_c =
-    vpx_highbd_12_sub_pixel_variance32x64_c;
-const SubpixVarMxNFunc highbd_12_subpel_var32x32_c =
-    vpx_highbd_12_sub_pixel_variance32x32_c;
-const SubpixVarMxNFunc highbd_12_subpel_var32x16_c =
-    vpx_highbd_12_sub_pixel_variance32x16_c;
-const SubpixVarMxNFunc highbd_12_subpel_var16x32_c =
-    vpx_highbd_12_sub_pixel_variance16x32_c;
-const SubpixVarMxNFunc highbd_12_subpel_var16x16_c =
-    vpx_highbd_12_sub_pixel_variance16x16_c;
-const SubpixVarMxNFunc highbd_12_subpel_var16x8_c =
-    vpx_highbd_12_sub_pixel_variance16x8_c;
-const SubpixVarMxNFunc highbd_12_subpel_var8x16_c =
-    vpx_highbd_12_sub_pixel_variance8x16_c;
-const SubpixVarMxNFunc highbd_12_subpel_var8x8_c =
-    vpx_highbd_12_sub_pixel_variance8x8_c;
-const SubpixVarMxNFunc highbd_12_subpel_var8x4_c =
-    vpx_highbd_12_sub_pixel_variance8x4_c;
-const SubpixVarMxNFunc highbd_12_subpel_var4x8_c =
-    vpx_highbd_12_sub_pixel_variance4x8_c;
-const SubpixVarMxNFunc highbd_12_subpel_var4x4_c =
-    vpx_highbd_12_sub_pixel_variance4x4_c;
 INSTANTIATE_TEST_CASE_P(
     C, VpxHBDSubpelVarianceTest,
-    ::testing::Values(make_tuple(6, 6, highbd_8_subpel_var64x64_c, 8),
-                      make_tuple(6, 5, highbd_8_subpel_var64x32_c, 8),
-                      make_tuple(5, 6, highbd_8_subpel_var32x64_c, 8),
-                      make_tuple(5, 5, highbd_8_subpel_var32x32_c, 8),
-                      make_tuple(5, 4, highbd_8_subpel_var32x16_c, 8),
-                      make_tuple(4, 5, highbd_8_subpel_var16x32_c, 8),
-                      make_tuple(4, 4, highbd_8_subpel_var16x16_c, 8),
-                      make_tuple(4, 3, highbd_8_subpel_var16x8_c, 8),
-                      make_tuple(3, 4, highbd_8_subpel_var8x16_c, 8),
-                      make_tuple(3, 3, highbd_8_subpel_var8x8_c, 8),
-                      make_tuple(3, 2, highbd_8_subpel_var8x4_c, 8),
-                      make_tuple(2, 3, highbd_8_subpel_var4x8_c, 8),
-                      make_tuple(2, 2, highbd_8_subpel_var4x4_c, 8),
-                      make_tuple(6, 6, highbd_10_subpel_var64x64_c, 10),
-                      make_tuple(6, 5, highbd_10_subpel_var64x32_c, 10),
-                      make_tuple(5, 6, highbd_10_subpel_var32x64_c, 10),
-                      make_tuple(5, 5, highbd_10_subpel_var32x32_c, 10),
-                      make_tuple(5, 4, highbd_10_subpel_var32x16_c, 10),
-                      make_tuple(4, 5, highbd_10_subpel_var16x32_c, 10),
-                      make_tuple(4, 4, highbd_10_subpel_var16x16_c, 10),
-                      make_tuple(4, 3, highbd_10_subpel_var16x8_c, 10),
-                      make_tuple(3, 4, highbd_10_subpel_var8x16_c, 10),
-                      make_tuple(3, 3, highbd_10_subpel_var8x8_c, 10),
-                      make_tuple(3, 2, highbd_10_subpel_var8x4_c, 10),
-                      make_tuple(2, 3, highbd_10_subpel_var4x8_c, 10),
-                      make_tuple(2, 2, highbd_10_subpel_var4x4_c, 10),
-                      make_tuple(6, 6, highbd_12_subpel_var64x64_c, 12),
-                      make_tuple(6, 5, highbd_12_subpel_var64x32_c, 12),
-                      make_tuple(5, 6, highbd_12_subpel_var32x64_c, 12),
-                      make_tuple(5, 5, highbd_12_subpel_var32x32_c, 12),
-                      make_tuple(5, 4, highbd_12_subpel_var32x16_c, 12),
-                      make_tuple(4, 5, highbd_12_subpel_var16x32_c, 12),
-                      make_tuple(4, 4, highbd_12_subpel_var16x16_c, 12),
-                      make_tuple(4, 3, highbd_12_subpel_var16x8_c, 12),
-                      make_tuple(3, 4, highbd_12_subpel_var8x16_c, 12),
-                      make_tuple(3, 3, highbd_12_subpel_var8x8_c, 12),
-                      make_tuple(3, 2, highbd_12_subpel_var8x4_c, 12),
-                      make_tuple(2, 3, highbd_12_subpel_var4x8_c, 12),
-                      make_tuple(2, 2, highbd_12_subpel_var4x4_c, 12)));
+    ::testing::Values(
+        make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_c, 8),
+        make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_c, 8),
+        make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_c, 8),
+        make_tuple(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_c, 8),
+        make_tuple(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_c, 8),
+        make_tuple(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_c, 8),
+        make_tuple(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_c, 8),
+        make_tuple(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_c, 8),
+        make_tuple(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_c, 8),
+        make_tuple(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_c, 8),
+        make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_c, 8),
+        make_tuple(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_c, 8),
+        make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_c, 8),
+        make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_c, 10),
+        make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_c, 10),
+        make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_c, 10),
+        make_tuple(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_c, 10),
+        make_tuple(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_c, 10),
+        make_tuple(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_c, 10),
+        make_tuple(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_c, 10),
+        make_tuple(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_c, 10),
+        make_tuple(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_c, 10),
+        make_tuple(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_c, 10),
+        make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_c, 10),
+        make_tuple(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_c, 10),
+        make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_c, 10),
+        make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_c, 12),
+        make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_c, 12),
+        make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_c, 12),
+        make_tuple(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_c, 12),
+        make_tuple(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_c, 12),
+        make_tuple(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_c, 12),
+        make_tuple(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_c, 12),
+        make_tuple(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_c, 12),
+        make_tuple(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_c, 12),
+        make_tuple(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_c, 12),
+        make_tuple(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_c, 12),
+        make_tuple(2, 3, &vpx_highbd_12_sub_pixel_variance4x8_c, 12),
+        make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_c, 12)));
 
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var64x64_c =
-    vpx_highbd_8_sub_pixel_avg_variance64x64_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var64x32_c =
-    vpx_highbd_8_sub_pixel_avg_variance64x32_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x64_c =
-    vpx_highbd_8_sub_pixel_avg_variance32x64_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x32_c =
-    vpx_highbd_8_sub_pixel_avg_variance32x32_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x16_c =
-    vpx_highbd_8_sub_pixel_avg_variance32x16_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x32_c =
-    vpx_highbd_8_sub_pixel_avg_variance16x32_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x16_c =
-    vpx_highbd_8_sub_pixel_avg_variance16x16_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x8_c =
-    vpx_highbd_8_sub_pixel_avg_variance16x8_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x16_c =
-    vpx_highbd_8_sub_pixel_avg_variance8x16_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x8_c =
-    vpx_highbd_8_sub_pixel_avg_variance8x8_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x4_c =
-    vpx_highbd_8_sub_pixel_avg_variance8x4_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var4x8_c =
-    vpx_highbd_8_sub_pixel_avg_variance4x8_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var4x4_c =
-    vpx_highbd_8_sub_pixel_avg_variance4x4_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var64x64_c =
-    vpx_highbd_10_sub_pixel_avg_variance64x64_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var64x32_c =
-    vpx_highbd_10_sub_pixel_avg_variance64x32_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x64_c =
-    vpx_highbd_10_sub_pixel_avg_variance32x64_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x32_c =
-    vpx_highbd_10_sub_pixel_avg_variance32x32_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x16_c =
-    vpx_highbd_10_sub_pixel_avg_variance32x16_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x32_c =
-    vpx_highbd_10_sub_pixel_avg_variance16x32_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x16_c =
-    vpx_highbd_10_sub_pixel_avg_variance16x16_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x8_c =
-    vpx_highbd_10_sub_pixel_avg_variance16x8_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x16_c =
-    vpx_highbd_10_sub_pixel_avg_variance8x16_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x8_c =
-    vpx_highbd_10_sub_pixel_avg_variance8x8_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x4_c =
-    vpx_highbd_10_sub_pixel_avg_variance8x4_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var4x8_c =
-    vpx_highbd_10_sub_pixel_avg_variance4x8_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var4x4_c =
-    vpx_highbd_10_sub_pixel_avg_variance4x4_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var64x64_c =
-    vpx_highbd_12_sub_pixel_avg_variance64x64_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var64x32_c =
-    vpx_highbd_12_sub_pixel_avg_variance64x32_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x64_c =
-    vpx_highbd_12_sub_pixel_avg_variance32x64_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x32_c =
-    vpx_highbd_12_sub_pixel_avg_variance32x32_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x16_c =
-    vpx_highbd_12_sub_pixel_avg_variance32x16_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x32_c =
-    vpx_highbd_12_sub_pixel_avg_variance16x32_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x16_c =
-    vpx_highbd_12_sub_pixel_avg_variance16x16_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x8_c =
-    vpx_highbd_12_sub_pixel_avg_variance16x8_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x16_c =
-    vpx_highbd_12_sub_pixel_avg_variance8x16_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x8_c =
-    vpx_highbd_12_sub_pixel_avg_variance8x8_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x4_c =
-    vpx_highbd_12_sub_pixel_avg_variance8x4_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var4x8_c =
-    vpx_highbd_12_sub_pixel_avg_variance4x8_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var4x4_c =
-    vpx_highbd_12_sub_pixel_avg_variance4x4_c;
 INSTANTIATE_TEST_CASE_P(
     C, VpxHBDSubpelAvgVarianceTest,
     ::testing::Values(
-        make_tuple(6, 6, highbd_8_subpel_avg_var64x64_c, 8),
-        make_tuple(6, 5, highbd_8_subpel_avg_var64x32_c, 8),
-        make_tuple(5, 6, highbd_8_subpel_avg_var32x64_c, 8),
-        make_tuple(5, 5, highbd_8_subpel_avg_var32x32_c, 8),
-        make_tuple(5, 4, highbd_8_subpel_avg_var32x16_c, 8),
-        make_tuple(4, 5, highbd_8_subpel_avg_var16x32_c, 8),
-        make_tuple(4, 4, highbd_8_subpel_avg_var16x16_c, 8),
-        make_tuple(4, 3, highbd_8_subpel_avg_var16x8_c, 8),
-        make_tuple(3, 4, highbd_8_subpel_avg_var8x16_c, 8),
-        make_tuple(3, 3, highbd_8_subpel_avg_var8x8_c, 8),
-        make_tuple(3, 2, highbd_8_subpel_avg_var8x4_c, 8),
-        make_tuple(2, 3, highbd_8_subpel_avg_var4x8_c, 8),
-        make_tuple(2, 2, highbd_8_subpel_avg_var4x4_c, 8),
-        make_tuple(6, 6, highbd_10_subpel_avg_var64x64_c, 10),
-        make_tuple(6, 5, highbd_10_subpel_avg_var64x32_c, 10),
-        make_tuple(5, 6, highbd_10_subpel_avg_var32x64_c, 10),
-        make_tuple(5, 5, highbd_10_subpel_avg_var32x32_c, 10),
-        make_tuple(5, 4, highbd_10_subpel_avg_var32x16_c, 10),
-        make_tuple(4, 5, highbd_10_subpel_avg_var16x32_c, 10),
-        make_tuple(4, 4, highbd_10_subpel_avg_var16x16_c, 10),
-        make_tuple(4, 3, highbd_10_subpel_avg_var16x8_c, 10),
-        make_tuple(3, 4, highbd_10_subpel_avg_var8x16_c, 10),
-        make_tuple(3, 3, highbd_10_subpel_avg_var8x8_c, 10),
-        make_tuple(3, 2, highbd_10_subpel_avg_var8x4_c, 10),
-        make_tuple(2, 3, highbd_10_subpel_avg_var4x8_c, 10),
-        make_tuple(2, 2, highbd_10_subpel_avg_var4x4_c, 10),
-        make_tuple(6, 6, highbd_12_subpel_avg_var64x64_c, 12),
-        make_tuple(6, 5, highbd_12_subpel_avg_var64x32_c, 12),
-        make_tuple(5, 6, highbd_12_subpel_avg_var32x64_c, 12),
-        make_tuple(5, 5, highbd_12_subpel_avg_var32x32_c, 12),
-        make_tuple(5, 4, highbd_12_subpel_avg_var32x16_c, 12),
-        make_tuple(4, 5, highbd_12_subpel_avg_var16x32_c, 12),
-        make_tuple(4, 4, highbd_12_subpel_avg_var16x16_c, 12),
-        make_tuple(4, 3, highbd_12_subpel_avg_var16x8_c, 12),
-        make_tuple(3, 4, highbd_12_subpel_avg_var8x16_c, 12),
-        make_tuple(3, 3, highbd_12_subpel_avg_var8x8_c, 12),
-        make_tuple(3, 2, highbd_12_subpel_avg_var8x4_c, 12),
-        make_tuple(2, 3, highbd_12_subpel_avg_var4x8_c, 12),
-        make_tuple(2, 2, highbd_12_subpel_avg_var4x4_c, 12)));
+        make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_c, 8),
+        make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_c, 8),
+        make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_c, 8),
+        make_tuple(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_c, 8),
+        make_tuple(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_c, 8),
+        make_tuple(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_c, 8),
+        make_tuple(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_c, 8),
+        make_tuple(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_c, 8),
+        make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_c, 8),
+        make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_c, 8),
+        make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_c, 8),
+        make_tuple(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_c, 8),
+        make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_c, 8),
+        make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_c, 10),
+        make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_c, 10),
+        make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_c, 10),
+        make_tuple(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_c, 10),
+        make_tuple(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_c, 10),
+        make_tuple(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_c, 10),
+        make_tuple(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_c, 10),
+        make_tuple(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_c, 10),
+        make_tuple(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_c, 10),
+        make_tuple(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_c, 10),
+        make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_c, 10),
+        make_tuple(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_c, 10),
+        make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_c, 10),
+        make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_c, 12),
+        make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_c, 12),
+        make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_c, 12),
+        make_tuple(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_c, 12),
+        make_tuple(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_c, 12),
+        make_tuple(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_c, 12),
+        make_tuple(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_c, 12),
+        make_tuple(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_c, 12),
+        make_tuple(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_c, 12),
+        make_tuple(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_c, 12),
+        make_tuple(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_c, 12),
+        make_tuple(2, 3, &vpx_highbd_12_sub_pixel_avg_variance4x8_c, 12),
+        make_tuple(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_c, 12)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #if HAVE_MMX
-const VarianceMxNFunc mse16x16_mmx = vpx_mse16x16_mmx;
 INSTANTIATE_TEST_CASE_P(MMX, VpxMseTest,
-                        ::testing::Values(make_tuple(4, 4, mse16x16_mmx)));
+                        ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_mmx)));
 
 INSTANTIATE_TEST_CASE_P(MMX, SumOfSquaresTest,
                         ::testing::Values(vpx_get_mb_ss_mmx));
 
-const VarianceMxNFunc variance16x16_mmx = vpx_variance16x16_mmx;
-const VarianceMxNFunc variance16x8_mmx = vpx_variance16x8_mmx;
-const VarianceMxNFunc variance8x16_mmx = vpx_variance8x16_mmx;
-const VarianceMxNFunc variance8x8_mmx = vpx_variance8x8_mmx;
-const VarianceMxNFunc variance4x4_mmx = vpx_variance4x4_mmx;
 INSTANTIATE_TEST_CASE_P(
     MMX, VpxVarianceTest,
-    ::testing::Values(make_tuple(4, 4, variance16x16_mmx, 0),
-                      make_tuple(4, 3, variance16x8_mmx, 0),
-                      make_tuple(3, 4, variance8x16_mmx, 0),
-                      make_tuple(3, 3, variance8x8_mmx, 0),
-                      make_tuple(2, 2, variance4x4_mmx, 0)));
+    ::testing::Values(make_tuple(4, 4, &vpx_variance16x16_mmx, 0),
+                      make_tuple(4, 3, &vpx_variance16x8_mmx, 0),
+                      make_tuple(3, 4, &vpx_variance8x16_mmx, 0),
+                      make_tuple(3, 3, &vpx_variance8x8_mmx, 0),
+                      make_tuple(2, 2, &vpx_variance4x4_mmx, 0)));
 
-const SubpixVarMxNFunc subpel_var16x16_mmx = vpx_sub_pixel_variance16x16_mmx;
-const SubpixVarMxNFunc subpel_var16x8_mmx = vpx_sub_pixel_variance16x8_mmx;
-const SubpixVarMxNFunc subpel_var8x16_mmx = vpx_sub_pixel_variance8x16_mmx;
-const SubpixVarMxNFunc subpel_var8x8_mmx = vpx_sub_pixel_variance8x8_mmx;
-const SubpixVarMxNFunc subpel_var4x4_mmx = vpx_sub_pixel_variance4x4_mmx;
 INSTANTIATE_TEST_CASE_P(
     MMX, VpxSubpelVarianceTest,
-    ::testing::Values(make_tuple(4, 4, subpel_var16x16_mmx, 0),
-                      make_tuple(4, 3, subpel_var16x8_mmx, 0),
-                      make_tuple(3, 4, subpel_var8x16_mmx, 0),
-                      make_tuple(3, 3, subpel_var8x8_mmx, 0),
-                      make_tuple(2, 2, subpel_var4x4_mmx, 0)));
+    ::testing::Values(make_tuple(4, 4, &vpx_sub_pixel_variance16x16_mmx, 0),
+                      make_tuple(4, 3, &vpx_sub_pixel_variance16x8_mmx, 0),
+                      make_tuple(3, 4, &vpx_sub_pixel_variance8x16_mmx, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_variance8x8_mmx, 0),
+                      make_tuple(2, 2, &vpx_sub_pixel_variance4x4_mmx, 0)));
 #endif  // HAVE_MMX
 
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
                         ::testing::Values(vpx_get_mb_ss_sse2));
 
-const VarianceMxNFunc mse16x16_sse2 = vpx_mse16x16_sse2;
-const VarianceMxNFunc mse16x8_sse2 = vpx_mse16x8_sse2;
-const VarianceMxNFunc mse8x16_sse2 = vpx_mse8x16_sse2;
-const VarianceMxNFunc mse8x8_sse2 = vpx_mse8x8_sse2;
 INSTANTIATE_TEST_CASE_P(SSE2, VpxMseTest,
-                        ::testing::Values(make_tuple(4, 4, mse16x16_sse2),
-                                          make_tuple(4, 3, mse16x8_sse2),
-                                          make_tuple(3, 4, mse8x16_sse2),
-                                          make_tuple(3, 3, mse8x8_sse2)));
+                        ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_sse2),
+                                          make_tuple(4, 3, &vpx_mse16x8_sse2),
+                                          make_tuple(3, 4, &vpx_mse8x16_sse2),
+                                          make_tuple(3, 3, &vpx_mse8x8_sse2)));
 
-const VarianceMxNFunc variance64x64_sse2 = vpx_variance64x64_sse2;
-const VarianceMxNFunc variance64x32_sse2 = vpx_variance64x32_sse2;
-const VarianceMxNFunc variance32x64_sse2 = vpx_variance32x64_sse2;
-const VarianceMxNFunc variance32x32_sse2 = vpx_variance32x32_sse2;
-const VarianceMxNFunc variance32x16_sse2 = vpx_variance32x16_sse2;
-const VarianceMxNFunc variance16x32_sse2 = vpx_variance16x32_sse2;
-const VarianceMxNFunc variance16x16_sse2 = vpx_variance16x16_sse2;
-const VarianceMxNFunc variance16x8_sse2 = vpx_variance16x8_sse2;
-const VarianceMxNFunc variance8x16_sse2 = vpx_variance8x16_sse2;
-const VarianceMxNFunc variance8x8_sse2 = vpx_variance8x8_sse2;
-const VarianceMxNFunc variance8x4_sse2 = vpx_variance8x4_sse2;
-const VarianceMxNFunc variance4x8_sse2 = vpx_variance4x8_sse2;
-const VarianceMxNFunc variance4x4_sse2 = vpx_variance4x4_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VpxVarianceTest,
-    ::testing::Values(make_tuple(6, 6, variance64x64_sse2, 0),
-                      make_tuple(6, 5, variance64x32_sse2, 0),
-                      make_tuple(5, 6, variance32x64_sse2, 0),
-                      make_tuple(5, 5, variance32x32_sse2, 0),
-                      make_tuple(5, 4, variance32x16_sse2, 0),
-                      make_tuple(4, 5, variance16x32_sse2, 0),
-                      make_tuple(4, 4, variance16x16_sse2, 0),
-                      make_tuple(4, 3, variance16x8_sse2, 0),
-                      make_tuple(3, 4, variance8x16_sse2, 0),
-                      make_tuple(3, 3, variance8x8_sse2, 0),
-                      make_tuple(3, 2, variance8x4_sse2, 0),
-                      make_tuple(2, 3, variance4x8_sse2, 0),
-                      make_tuple(2, 2, variance4x4_sse2, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_sse2, 0),
+                      make_tuple(6, 5, &vpx_variance64x32_sse2, 0),
+                      make_tuple(5, 6, &vpx_variance32x64_sse2, 0),
+                      make_tuple(5, 5, &vpx_variance32x32_sse2, 0),
+                      make_tuple(5, 4, &vpx_variance32x16_sse2, 0),
+                      make_tuple(4, 5, &vpx_variance16x32_sse2, 0),
+                      make_tuple(4, 4, &vpx_variance16x16_sse2, 0),
+                      make_tuple(4, 3, &vpx_variance16x8_sse2, 0),
+                      make_tuple(3, 4, &vpx_variance8x16_sse2, 0),
+                      make_tuple(3, 3, &vpx_variance8x8_sse2, 0),
+                      make_tuple(3, 2, &vpx_variance8x4_sse2, 0),
+                      make_tuple(2, 3, &vpx_variance4x8_sse2, 0),
+                      make_tuple(2, 2, &vpx_variance4x4_sse2, 0)));
 
 #if CONFIG_USE_X86INC
-const SubpixVarMxNFunc subpel_variance64x64_sse2 =
-    vpx_sub_pixel_variance64x64_sse2;
-const SubpixVarMxNFunc subpel_variance64x32_sse2 =
-    vpx_sub_pixel_variance64x32_sse2;
-const SubpixVarMxNFunc subpel_variance32x64_sse2 =
-    vpx_sub_pixel_variance32x64_sse2;
-const SubpixVarMxNFunc subpel_variance32x32_sse2 =
-    vpx_sub_pixel_variance32x32_sse2;
-const SubpixVarMxNFunc subpel_variance32x16_sse2 =
-    vpx_sub_pixel_variance32x16_sse2;
-const SubpixVarMxNFunc subpel_variance16x32_sse2 =
-    vpx_sub_pixel_variance16x32_sse2;
-const SubpixVarMxNFunc subpel_variance16x16_sse2 =
-    vpx_sub_pixel_variance16x16_sse2;
-const SubpixVarMxNFunc subpel_variance16x8_sse2 =
-    vpx_sub_pixel_variance16x8_sse2;
-const SubpixVarMxNFunc subpel_variance8x16_sse2 =
-    vpx_sub_pixel_variance8x16_sse2;
-const SubpixVarMxNFunc subpel_variance8x8_sse2 = vpx_sub_pixel_variance8x8_sse2;
-const SubpixVarMxNFunc subpel_variance8x4_sse2 = vpx_sub_pixel_variance8x4_sse2;
-const SubpixVarMxNFunc subpel_variance4x8_sse = vpx_sub_pixel_variance4x8_sse;
-const SubpixVarMxNFunc subpel_variance4x4_sse = vpx_sub_pixel_variance4x4_sse;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VpxSubpelVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_variance64x64_sse2, 0),
-                      make_tuple(6, 5, subpel_variance64x32_sse2, 0),
-                      make_tuple(5, 6, subpel_variance32x64_sse2, 0),
-                      make_tuple(5, 5, subpel_variance32x32_sse2, 0),
-                      make_tuple(5, 4, subpel_variance32x16_sse2, 0),
-                      make_tuple(4, 5, subpel_variance16x32_sse2, 0),
-                      make_tuple(4, 4, subpel_variance16x16_sse2, 0),
-                      make_tuple(4, 3, subpel_variance16x8_sse2, 0),
-                      make_tuple(3, 4, subpel_variance8x16_sse2, 0),
-                      make_tuple(3, 3, subpel_variance8x8_sse2, 0),
-                      make_tuple(3, 2, subpel_variance8x4_sse2, 0),
-                      make_tuple(2, 3, subpel_variance4x8_sse, 0),
-                      make_tuple(2, 2, subpel_variance4x4_sse, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_sse2, 0),
+                      make_tuple(6, 5, &vpx_sub_pixel_variance64x32_sse2, 0),
+                      make_tuple(5, 6, &vpx_sub_pixel_variance32x64_sse2, 0),
+                      make_tuple(5, 5, &vpx_sub_pixel_variance32x32_sse2, 0),
+                      make_tuple(5, 4, &vpx_sub_pixel_variance32x16_sse2, 0),
+                      make_tuple(4, 5, &vpx_sub_pixel_variance16x32_sse2, 0),
+                      make_tuple(4, 4, &vpx_sub_pixel_variance16x16_sse2, 0),
+                      make_tuple(4, 3, &vpx_sub_pixel_variance16x8_sse2, 0),
+                      make_tuple(3, 4, &vpx_sub_pixel_variance8x16_sse2, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_variance8x8_sse2, 0),
+                      make_tuple(3, 2, &vpx_sub_pixel_variance8x4_sse2, 0),
+                      make_tuple(2, 3, &vpx_sub_pixel_variance4x8_sse, 0),
+                      make_tuple(2, 2, &vpx_sub_pixel_variance4x4_sse, 0)));
 
-const SubpixAvgVarMxNFunc subpel_avg_variance64x64_sse2 =
-    vpx_sub_pixel_avg_variance64x64_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance64x32_sse2 =
-    vpx_sub_pixel_avg_variance64x32_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x64_sse2 =
-    vpx_sub_pixel_avg_variance32x64_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x32_sse2 =
-    vpx_sub_pixel_avg_variance32x32_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x16_sse2 =
-    vpx_sub_pixel_avg_variance32x16_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x32_sse2 =
-    vpx_sub_pixel_avg_variance16x32_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x16_sse2 =
-    vpx_sub_pixel_avg_variance16x16_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x8_sse2 =
-    vpx_sub_pixel_avg_variance16x8_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x16_sse2 =
-    vpx_sub_pixel_avg_variance8x16_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x8_sse2 =
-    vpx_sub_pixel_avg_variance8x8_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x4_sse2 =
-    vpx_sub_pixel_avg_variance8x4_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x8_sse =
-    vpx_sub_pixel_avg_variance4x8_sse;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x4_sse =
-    vpx_sub_pixel_avg_variance4x4_sse;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VpxSubpelAvgVarianceTest,
     ::testing::Values(
-                      make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0),
-                      make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
-                      make_tuple(5, 6, subpel_avg_variance32x64_sse2, 0),
-                      make_tuple(5, 5, subpel_avg_variance32x32_sse2, 0),
-                      make_tuple(5, 4, subpel_avg_variance32x16_sse2, 0),
-                      make_tuple(4, 5, subpel_avg_variance16x32_sse2, 0),
-                      make_tuple(4, 4, subpel_avg_variance16x16_sse2, 0),
-                      make_tuple(4, 3, subpel_avg_variance16x8_sse2, 0),
-                      make_tuple(3, 4, subpel_avg_variance8x16_sse2, 0),
-                      make_tuple(3, 3, subpel_avg_variance8x8_sse2, 0),
-                      make_tuple(3, 2, subpel_avg_variance8x4_sse2, 0),
-                      make_tuple(2, 3, subpel_avg_variance4x8_sse, 0),
-                      make_tuple(2, 2, subpel_avg_variance4x4_sse, 0)));
+        make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_sse2, 0),
+        make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_sse2, 0),
+        make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_sse2, 0),
+        make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_sse2, 0),
+        make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_sse2, 0),
+        make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_sse2, 0),
+        make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_sse2, 0),
+        make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_sse2, 0),
+        make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_sse2, 0),
+        make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_sse2, 0),
+        make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0),
+        make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse, 0),
+        make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse, 0)));
 #endif  // CONFIG_USE_X86INC
 
 #if CONFIG_VP9_HIGHBITDEPTH
 /* TODO(debargha): This test does not support the highbd version
-const VarianceMxNFunc highbd_12_mse16x16_sse2 = vpx_highbd_12_mse16x16_sse2;
-const VarianceMxNFunc highbd_12_mse16x8_sse2 = vpx_highbd_12_mse16x8_sse2;
-const VarianceMxNFunc highbd_12_mse8x16_sse2 = vpx_highbd_12_mse8x16_sse2;
-const VarianceMxNFunc highbd_12_mse8x8_sse2 = vpx_highbd_12_mse8x8_sse2;
-
-const VarianceMxNFunc highbd_10_mse16x16_sse2 = vpx_highbd_10_mse16x16_sse2;
-const VarianceMxNFunc highbd_10_mse16x8_sse2 = vpx_highbd_10_mse16x8_sse2;
-const VarianceMxNFunc highbd_10_mse8x16_sse2 = vpx_highbd_10_mse8x16_sse2;
-const VarianceMxNFunc highbd_10_mse8x8_sse2 = vpx_highbd_10_mse8x8_sse2;
-
-const VarianceMxNFunc highbd_8_mse16x16_sse2 = vpx_highbd_8_mse16x16_sse2;
-const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2;
-const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2;
-const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2;
 INSTANTIATE_TEST_CASE_P(
-    SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2),
-                                           make_tuple(4, 3, highbd_12_mse16x8_sse2),
-                                           make_tuple(3, 4, highbd_12_mse8x16_sse2),
-                                           make_tuple(3, 3, highbd_12_mse8x8_sse2),
-                                           make_tuple(4, 4, highbd_10_mse16x16_sse2),
-                                           make_tuple(4, 3, highbd_10_mse16x8_sse2),
-                                           make_tuple(3, 4, highbd_10_mse8x16_sse2),
-                                           make_tuple(3, 3, highbd_10_mse8x8_sse2),
-                                           make_tuple(4, 4, highbd_8_mse16x16_sse2),
-                                           make_tuple(4, 3, highbd_8_mse16x8_sse2),
-                                           make_tuple(3, 4, highbd_8_mse8x16_sse2),
-                                           make_tuple(3, 3, highbd_8_mse8x8_sse2)));
+    SSE2, VpxHBDMseTest,
+    ::testing::Values(make_tuple(4, 4, &vpx_highbd_12_mse16x16_sse2),
+                      make_tuple(4, 3, &vpx_highbd_12_mse16x8_sse2),
+                      make_tuple(3, 4, &vpx_highbd_12_mse8x16_sse2),
+                      make_tuple(3, 3, &vpx_highbd_12_mse8x8_sse2),
+                      make_tuple(4, 4, &vpx_highbd_10_mse16x16_sse2),
+                      make_tuple(4, 3, &vpx_highbd_10_mse16x8_sse2),
+                      make_tuple(3, 4, &vpx_highbd_10_mse8x16_sse2),
+                      make_tuple(3, 3, &vpx_highbd_10_mse8x8_sse2),
+                      make_tuple(4, 4, &vpx_highbd_8_mse16x16_sse2),
+                      make_tuple(4, 3, &vpx_highbd_8_mse16x8_sse2),
+                      make_tuple(3, 4, &vpx_highbd_8_mse8x16_sse2),
+                      make_tuple(3, 3, &vpx_highbd_8_mse8x8_sse2)));
 */
 
-const VarianceMxNFunc highbd_12_variance64x64_sse2 =
-    vpx_highbd_12_variance64x64_sse2;
-const VarianceMxNFunc highbd_12_variance64x32_sse2 =
-    vpx_highbd_12_variance64x32_sse2;
-const VarianceMxNFunc highbd_12_variance32x64_sse2 =
-    vpx_highbd_12_variance32x64_sse2;
-const VarianceMxNFunc highbd_12_variance32x32_sse2 =
-    vpx_highbd_12_variance32x32_sse2;
-const VarianceMxNFunc highbd_12_variance32x16_sse2 =
-    vpx_highbd_12_variance32x16_sse2;
-const VarianceMxNFunc highbd_12_variance16x32_sse2 =
-    vpx_highbd_12_variance16x32_sse2;
-const VarianceMxNFunc highbd_12_variance16x16_sse2 =
-    vpx_highbd_12_variance16x16_sse2;
-const VarianceMxNFunc highbd_12_variance16x8_sse2 =
-    vpx_highbd_12_variance16x8_sse2;
-const VarianceMxNFunc highbd_12_variance8x16_sse2 =
-    vpx_highbd_12_variance8x16_sse2;
-const VarianceMxNFunc highbd_12_variance8x8_sse2 =
-    vpx_highbd_12_variance8x8_sse2;
-const VarianceMxNFunc highbd_10_variance64x64_sse2 =
-    vpx_highbd_10_variance64x64_sse2;
-const VarianceMxNFunc highbd_10_variance64x32_sse2 =
-    vpx_highbd_10_variance64x32_sse2;
-const VarianceMxNFunc highbd_10_variance32x64_sse2 =
-    vpx_highbd_10_variance32x64_sse2;
-const VarianceMxNFunc highbd_10_variance32x32_sse2 =
-    vpx_highbd_10_variance32x32_sse2;
-const VarianceMxNFunc highbd_10_variance32x16_sse2 =
-    vpx_highbd_10_variance32x16_sse2;
-const VarianceMxNFunc highbd_10_variance16x32_sse2 =
-    vpx_highbd_10_variance16x32_sse2;
-const VarianceMxNFunc highbd_10_variance16x16_sse2 =
-    vpx_highbd_10_variance16x16_sse2;
-const VarianceMxNFunc highbd_10_variance16x8_sse2 =
-    vpx_highbd_10_variance16x8_sse2;
-const VarianceMxNFunc highbd_10_variance8x16_sse2 =
-    vpx_highbd_10_variance8x16_sse2;
-const VarianceMxNFunc highbd_10_variance8x8_sse2 =
-    vpx_highbd_10_variance8x8_sse2;
-const VarianceMxNFunc highbd_8_variance64x64_sse2 =
-    vpx_highbd_8_variance64x64_sse2;
-const VarianceMxNFunc highbd_8_variance64x32_sse2 =
-    vpx_highbd_8_variance64x32_sse2;
-const VarianceMxNFunc highbd_8_variance32x64_sse2 =
-    vpx_highbd_8_variance32x64_sse2;
-const VarianceMxNFunc highbd_8_variance32x32_sse2 =
-    vpx_highbd_8_variance32x32_sse2;
-const VarianceMxNFunc highbd_8_variance32x16_sse2 =
-    vpx_highbd_8_variance32x16_sse2;
-const VarianceMxNFunc highbd_8_variance16x32_sse2 =
-    vpx_highbd_8_variance16x32_sse2;
-const VarianceMxNFunc highbd_8_variance16x16_sse2 =
-    vpx_highbd_8_variance16x16_sse2;
-const VarianceMxNFunc highbd_8_variance16x8_sse2 =
-    vpx_highbd_8_variance16x8_sse2;
-const VarianceMxNFunc highbd_8_variance8x16_sse2 =
-    vpx_highbd_8_variance8x16_sse2;
-const VarianceMxNFunc highbd_8_variance8x8_sse2 =
-    vpx_highbd_8_variance8x8_sse2;
-
 INSTANTIATE_TEST_CASE_P(
     SSE2, VpxHBDVarianceTest,
-    ::testing::Values(make_tuple(6, 6, highbd_12_variance64x64_sse2, 12),
-                      make_tuple(6, 5, highbd_12_variance64x32_sse2, 12),
-                      make_tuple(5, 6, highbd_12_variance32x64_sse2, 12),
-                      make_tuple(5, 5, highbd_12_variance32x32_sse2, 12),
-                      make_tuple(5, 4, highbd_12_variance32x16_sse2, 12),
-                      make_tuple(4, 5, highbd_12_variance16x32_sse2, 12),
-                      make_tuple(4, 4, highbd_12_variance16x16_sse2, 12),
-                      make_tuple(4, 3, highbd_12_variance16x8_sse2, 12),
-                      make_tuple(3, 4, highbd_12_variance8x16_sse2, 12),
-                      make_tuple(3, 3, highbd_12_variance8x8_sse2, 12),
-                      make_tuple(6, 6, highbd_10_variance64x64_sse2, 10),
-                      make_tuple(6, 5, highbd_10_variance64x32_sse2, 10),
-                      make_tuple(5, 6, highbd_10_variance32x64_sse2, 10),
-                      make_tuple(5, 5, highbd_10_variance32x32_sse2, 10),
-                      make_tuple(5, 4, highbd_10_variance32x16_sse2, 10),
-                      make_tuple(4, 5, highbd_10_variance16x32_sse2, 10),
-                      make_tuple(4, 4, highbd_10_variance16x16_sse2, 10),
-                      make_tuple(4, 3, highbd_10_variance16x8_sse2, 10),
-                      make_tuple(3, 4, highbd_10_variance8x16_sse2, 10),
-                      make_tuple(3, 3, highbd_10_variance8x8_sse2, 10),
-                      make_tuple(6, 6, highbd_8_variance64x64_sse2, 8),
-                      make_tuple(6, 5, highbd_8_variance64x32_sse2, 8),
-                      make_tuple(5, 6, highbd_8_variance32x64_sse2, 8),
-                      make_tuple(5, 5, highbd_8_variance32x32_sse2, 8),
-                      make_tuple(5, 4, highbd_8_variance32x16_sse2, 8),
-                      make_tuple(4, 5, highbd_8_variance16x32_sse2, 8),
-                      make_tuple(4, 4, highbd_8_variance16x16_sse2, 8),
-                      make_tuple(4, 3, highbd_8_variance16x8_sse2, 8),
-                      make_tuple(3, 4, highbd_8_variance8x16_sse2, 8),
-                      make_tuple(3, 3, highbd_8_variance8x8_sse2, 8)));
+    ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_sse2, 12),
+                      make_tuple(6, 5, &vpx_highbd_12_variance64x32_sse2, 12),
+                      make_tuple(5, 6, &vpx_highbd_12_variance32x64_sse2, 12),
+                      make_tuple(5, 5, &vpx_highbd_12_variance32x32_sse2, 12),
+                      make_tuple(5, 4, &vpx_highbd_12_variance32x16_sse2, 12),
+                      make_tuple(4, 5, &vpx_highbd_12_variance16x32_sse2, 12),
+                      make_tuple(4, 4, &vpx_highbd_12_variance16x16_sse2, 12),
+                      make_tuple(4, 3, &vpx_highbd_12_variance16x8_sse2, 12),
+                      make_tuple(3, 4, &vpx_highbd_12_variance8x16_sse2, 12),
+                      make_tuple(3, 3, &vpx_highbd_12_variance8x8_sse2, 12),
+                      make_tuple(6, 6, &vpx_highbd_10_variance64x64_sse2, 10),
+                      make_tuple(6, 5, &vpx_highbd_10_variance64x32_sse2, 10),
+                      make_tuple(5, 6, &vpx_highbd_10_variance32x64_sse2, 10),
+                      make_tuple(5, 5, &vpx_highbd_10_variance32x32_sse2, 10),
+                      make_tuple(5, 4, &vpx_highbd_10_variance32x16_sse2, 10),
+                      make_tuple(4, 5, &vpx_highbd_10_variance16x32_sse2, 10),
+                      make_tuple(4, 4, &vpx_highbd_10_variance16x16_sse2, 10),
+                      make_tuple(4, 3, &vpx_highbd_10_variance16x8_sse2, 10),
+                      make_tuple(3, 4, &vpx_highbd_10_variance8x16_sse2, 10),
+                      make_tuple(3, 3, &vpx_highbd_10_variance8x8_sse2, 10),
+                      make_tuple(6, 6, &vpx_highbd_8_variance64x64_sse2, 8),
+                      make_tuple(6, 5, &vpx_highbd_8_variance64x32_sse2, 8),
+                      make_tuple(5, 6, &vpx_highbd_8_variance32x64_sse2, 8),
+                      make_tuple(5, 5, &vpx_highbd_8_variance32x32_sse2, 8),
+                      make_tuple(5, 4, &vpx_highbd_8_variance32x16_sse2, 8),
+                      make_tuple(4, 5, &vpx_highbd_8_variance16x32_sse2, 8),
+                      make_tuple(4, 4, &vpx_highbd_8_variance16x16_sse2, 8),
+                      make_tuple(4, 3, &vpx_highbd_8_variance16x8_sse2, 8),
+                      make_tuple(3, 4, &vpx_highbd_8_variance8x16_sse2, 8),
+                      make_tuple(3, 3, &vpx_highbd_8_variance8x8_sse2, 8)));
 
 #if CONFIG_USE_X86INC
-const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 =
-    vpx_highbd_12_sub_pixel_variance64x64_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 =
-    vpx_highbd_12_sub_pixel_variance64x32_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 =
-    vpx_highbd_12_sub_pixel_variance32x64_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 =
-    vpx_highbd_12_sub_pixel_variance32x32_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 =
-    vpx_highbd_12_sub_pixel_variance32x16_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 =
-    vpx_highbd_12_sub_pixel_variance16x32_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 =
-    vpx_highbd_12_sub_pixel_variance16x16_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 =
-    vpx_highbd_12_sub_pixel_variance16x8_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 =
-    vpx_highbd_12_sub_pixel_variance8x16_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 =
-    vpx_highbd_12_sub_pixel_variance8x8_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 =
-    vpx_highbd_12_sub_pixel_variance8x4_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 =
-    vpx_highbd_10_sub_pixel_variance64x64_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 =
-    vpx_highbd_10_sub_pixel_variance64x32_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 =
-    vpx_highbd_10_sub_pixel_variance32x64_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 =
-    vpx_highbd_10_sub_pixel_variance32x32_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 =
-    vpx_highbd_10_sub_pixel_variance32x16_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 =
-    vpx_highbd_10_sub_pixel_variance16x32_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 =
-    vpx_highbd_10_sub_pixel_variance16x16_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 =
-    vpx_highbd_10_sub_pixel_variance16x8_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 =
-    vpx_highbd_10_sub_pixel_variance8x16_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 =
-    vpx_highbd_10_sub_pixel_variance8x8_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 =
-    vpx_highbd_10_sub_pixel_variance8x4_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance64x64_sse2 =
-    vpx_highbd_8_sub_pixel_variance64x64_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance64x32_sse2 =
-    vpx_highbd_8_sub_pixel_variance64x32_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance32x64_sse2 =
-    vpx_highbd_8_sub_pixel_variance32x64_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance32x32_sse2 =
-    vpx_highbd_8_sub_pixel_variance32x32_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance32x16_sse2 =
-    vpx_highbd_8_sub_pixel_variance32x16_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance16x32_sse2 =
-    vpx_highbd_8_sub_pixel_variance16x32_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance16x16_sse2 =
-    vpx_highbd_8_sub_pixel_variance16x16_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance16x8_sse2 =
-    vpx_highbd_8_sub_pixel_variance16x8_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance8x16_sse2 =
-    vpx_highbd_8_sub_pixel_variance8x16_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance8x8_sse2 =
-    vpx_highbd_8_sub_pixel_variance8x8_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance8x4_sse2 =
-    vpx_highbd_8_sub_pixel_variance8x4_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VpxHBDSubpelVarianceTest,
-    ::testing::Values(make_tuple(6, 6, highbd_12_subpel_variance64x64_sse2, 12),
-                      make_tuple(6, 5, highbd_12_subpel_variance64x32_sse2, 12),
-                      make_tuple(5, 6, highbd_12_subpel_variance32x64_sse2, 12),
-                      make_tuple(5, 5, highbd_12_subpel_variance32x32_sse2, 12),
-                      make_tuple(5, 4, highbd_12_subpel_variance32x16_sse2, 12),
-                      make_tuple(4, 5, highbd_12_subpel_variance16x32_sse2, 12),
-                      make_tuple(4, 4, highbd_12_subpel_variance16x16_sse2, 12),
-                      make_tuple(4, 3, highbd_12_subpel_variance16x8_sse2, 12),
-                      make_tuple(3, 4, highbd_12_subpel_variance8x16_sse2, 12),
-                      make_tuple(3, 3, highbd_12_subpel_variance8x8_sse2, 12),
-                      make_tuple(3, 2, highbd_12_subpel_variance8x4_sse2, 12),
-                      make_tuple(6, 6, highbd_10_subpel_variance64x64_sse2, 10),
-                      make_tuple(6, 5, highbd_10_subpel_variance64x32_sse2, 10),
-                      make_tuple(5, 6, highbd_10_subpel_variance32x64_sse2, 10),
-                      make_tuple(5, 5, highbd_10_subpel_variance32x32_sse2, 10),
-                      make_tuple(5, 4, highbd_10_subpel_variance32x16_sse2, 10),
-                      make_tuple(4, 5, highbd_10_subpel_variance16x32_sse2, 10),
-                      make_tuple(4, 4, highbd_10_subpel_variance16x16_sse2, 10),
-                      make_tuple(4, 3, highbd_10_subpel_variance16x8_sse2, 10),
-                      make_tuple(3, 4, highbd_10_subpel_variance8x16_sse2, 10),
-                      make_tuple(3, 3, highbd_10_subpel_variance8x8_sse2, 10),
-                      make_tuple(3, 2, highbd_10_subpel_variance8x4_sse2, 10),
-                      make_tuple(6, 6, highbd_8_subpel_variance64x64_sse2, 8),
-                      make_tuple(6, 5, highbd_8_subpel_variance64x32_sse2, 8),
-                      make_tuple(5, 6, highbd_8_subpel_variance32x64_sse2, 8),
-                      make_tuple(5, 5, highbd_8_subpel_variance32x32_sse2, 8),
-                      make_tuple(5, 4, highbd_8_subpel_variance32x16_sse2, 8),
-                      make_tuple(4, 5, highbd_8_subpel_variance16x32_sse2, 8),
-                      make_tuple(4, 4, highbd_8_subpel_variance16x16_sse2, 8),
-                      make_tuple(4, 3, highbd_8_subpel_variance16x8_sse2, 8),
-                      make_tuple(3, 4, highbd_8_subpel_variance8x16_sse2, 8),
-                      make_tuple(3, 3, highbd_8_subpel_variance8x8_sse2, 8),
-                      make_tuple(3, 2, highbd_8_subpel_variance8x4_sse2, 8)));
+    ::testing::Values(
+        make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_sse2, 12),
+        make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_sse2, 12),
+        make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_sse2, 12),
+        make_tuple(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_sse2, 12),
+        make_tuple(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_sse2, 12),
+        make_tuple(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_sse2, 12),
+        make_tuple(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_sse2, 12),
+        make_tuple(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_sse2, 12),
+        make_tuple(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_sse2, 12),
+        make_tuple(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_sse2, 12),
+        make_tuple(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_sse2, 12),
+        make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_sse2, 10),
+        make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_sse2, 10),
+        make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_sse2, 10),
+        make_tuple(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_sse2, 10),
+        make_tuple(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_sse2, 10),
+        make_tuple(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_sse2, 10),
+        make_tuple(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_sse2, 10),
+        make_tuple(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_sse2, 10),
+        make_tuple(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_sse2, 10),
+        make_tuple(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_sse2, 10),
+        make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_sse2, 10),
+        make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_sse2, 8),
+        make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_sse2, 8),
+        make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_sse2, 8),
+        make_tuple(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_sse2, 8),
+        make_tuple(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_sse2, 8),
+        make_tuple(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_sse2, 8),
+        make_tuple(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_sse2, 8),
+        make_tuple(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_sse2, 8),
+        make_tuple(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_sse2, 8),
+        make_tuple(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_sse2, 8),
+        make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_sse2, 8)));
 
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance64x64_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance64x64_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance64x32_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance64x32_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x64_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance32x64_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x32_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance32x32_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x16_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance32x16_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x32_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance16x32_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x16_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance16x16_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x8_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance16x8_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x16_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance8x16_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x8_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance8x8_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x4_sse2 =
-    vpx_highbd_12_sub_pixel_avg_variance8x4_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance64x64_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance64x64_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance64x32_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance64x32_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x64_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance32x64_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x32_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance32x32_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x16_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance32x16_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x32_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance16x32_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x16_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance16x16_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x8_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance16x8_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x16_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance8x16_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x8_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance8x8_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x4_sse2 =
-    vpx_highbd_10_sub_pixel_avg_variance8x4_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance64x64_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance64x64_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance64x32_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance64x32_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x64_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance32x64_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x32_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance32x32_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x16_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance32x16_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x32_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance16x32_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x16_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance16x16_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x8_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance16x8_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x16_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance8x16_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x8_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance8x8_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x4_sse2 =
-    vpx_highbd_8_sub_pixel_avg_variance8x4_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VpxHBDSubpelAvgVarianceTest,
     ::testing::Values(
-        make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_sse2, 12),
-        make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_sse2, 12),
-        make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_sse2, 12),
-        make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_sse2, 12),
-        make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_sse2, 12),
-        make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_sse2, 12),
-        make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_sse2, 12),
-        make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_sse2, 12),
-        make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_sse2, 12),
-        make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_sse2, 12),
-        make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_sse2, 12),
-        make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_sse2, 10),
-        make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_sse2, 10),
-        make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_sse2, 10),
-        make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_sse2, 10),
-        make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_sse2, 10),
-        make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_sse2, 10),
-        make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_sse2, 10),
-        make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_sse2, 10),
-        make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_sse2, 10),
-        make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_sse2, 10),
-        make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_sse2, 10),
-        make_tuple(6, 6, highbd_8_subpel_avg_variance64x64_sse2, 8),
-        make_tuple(6, 5, highbd_8_subpel_avg_variance64x32_sse2, 8),
-        make_tuple(5, 6, highbd_8_subpel_avg_variance32x64_sse2, 8),
-        make_tuple(5, 5, highbd_8_subpel_avg_variance32x32_sse2, 8),
-        make_tuple(5, 4, highbd_8_subpel_avg_variance32x16_sse2, 8),
-        make_tuple(4, 5, highbd_8_subpel_avg_variance16x32_sse2, 8),
-        make_tuple(4, 4, highbd_8_subpel_avg_variance16x16_sse2, 8),
-        make_tuple(4, 3, highbd_8_subpel_avg_variance16x8_sse2, 8),
-        make_tuple(3, 4, highbd_8_subpel_avg_variance8x16_sse2, 8),
-        make_tuple(3, 3, highbd_8_subpel_avg_variance8x8_sse2, 8),
-        make_tuple(3, 2, highbd_8_subpel_avg_variance8x4_sse2, 8)));
+        make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_sse2, 12),
+        make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_sse2, 12),
+        make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_sse2, 12),
+        make_tuple(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_sse2, 12),
+        make_tuple(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_sse2, 12),
+        make_tuple(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_sse2, 12),
+        make_tuple(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_sse2, 12),
+        make_tuple(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_sse2, 12),
+        make_tuple(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_sse2, 12),
+        make_tuple(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_sse2, 12),
+        make_tuple(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_sse2, 12),
+        make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_sse2, 10),
+        make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_sse2, 10),
+        make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_sse2, 10),
+        make_tuple(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_sse2, 10),
+        make_tuple(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_sse2, 10),
+        make_tuple(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_sse2, 10),
+        make_tuple(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_sse2, 10),
+        make_tuple(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_sse2, 10),
+        make_tuple(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_sse2, 10),
+        make_tuple(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_sse2, 10),
+        make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_sse2, 10),
+        make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_sse2, 8),
+        make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_sse2, 8),
+        make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_sse2, 8),
+        make_tuple(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_sse2, 8),
+        make_tuple(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_sse2, 8),
+        make_tuple(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_sse2, 8),
+        make_tuple(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_sse2, 8),
+        make_tuple(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_sse2, 8),
+        make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_sse2, 8),
+        make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_sse2, 8),
+        make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_sse2, 8)));
 #endif  // CONFIG_USE_X86INC
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
 #if CONFIG_USE_X86INC
-const SubpixVarMxNFunc subpel_variance64x64_ssse3 =
-    vpx_sub_pixel_variance64x64_ssse3;
-const SubpixVarMxNFunc subpel_variance64x32_ssse3 =
-    vpx_sub_pixel_variance64x32_ssse3;
-const SubpixVarMxNFunc subpel_variance32x64_ssse3 =
-    vpx_sub_pixel_variance32x64_ssse3;
-const SubpixVarMxNFunc subpel_variance32x32_ssse3 =
-    vpx_sub_pixel_variance32x32_ssse3;
-const SubpixVarMxNFunc subpel_variance32x16_ssse3 =
-    vpx_sub_pixel_variance32x16_ssse3;
-const SubpixVarMxNFunc subpel_variance16x32_ssse3 =
-    vpx_sub_pixel_variance16x32_ssse3;
-const SubpixVarMxNFunc subpel_variance16x16_ssse3 =
-    vpx_sub_pixel_variance16x16_ssse3;
-const SubpixVarMxNFunc subpel_variance16x8_ssse3 =
-    vpx_sub_pixel_variance16x8_ssse3;
-const SubpixVarMxNFunc subpel_variance8x16_ssse3 =
-    vpx_sub_pixel_variance8x16_ssse3;
-const SubpixVarMxNFunc subpel_variance8x8_ssse3 =
-    vpx_sub_pixel_variance8x8_ssse3;
-const SubpixVarMxNFunc subpel_variance8x4_ssse3 =
-    vpx_sub_pixel_variance8x4_ssse3;
-const SubpixVarMxNFunc subpel_variance4x8_ssse3 =
-    vpx_sub_pixel_variance4x8_ssse3;
-const SubpixVarMxNFunc subpel_variance4x4_ssse3 =
-    vpx_sub_pixel_variance4x4_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VpxSubpelVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_variance64x64_ssse3, 0),
-                      make_tuple(6, 5, subpel_variance64x32_ssse3, 0),
-                      make_tuple(5, 6, subpel_variance32x64_ssse3, 0),
-                      make_tuple(5, 5, subpel_variance32x32_ssse3, 0),
-                      make_tuple(5, 4, subpel_variance32x16_ssse3, 0),
-                      make_tuple(4, 5, subpel_variance16x32_ssse3, 0),
-                      make_tuple(4, 4, subpel_variance16x16_ssse3, 0),
-                      make_tuple(4, 3, subpel_variance16x8_ssse3, 0),
-                      make_tuple(3, 4, subpel_variance8x16_ssse3, 0),
-                      make_tuple(3, 3, subpel_variance8x8_ssse3, 0),
-                      make_tuple(3, 2, subpel_variance8x4_ssse3, 0),
-                      make_tuple(2, 3, subpel_variance4x8_ssse3, 0),
-                      make_tuple(2, 2, subpel_variance4x4_ssse3, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_ssse3, 0),
+                      make_tuple(6, 5, &vpx_sub_pixel_variance64x32_ssse3, 0),
+                      make_tuple(5, 6, &vpx_sub_pixel_variance32x64_ssse3, 0),
+                      make_tuple(5, 5, &vpx_sub_pixel_variance32x32_ssse3, 0),
+                      make_tuple(5, 4, &vpx_sub_pixel_variance32x16_ssse3, 0),
+                      make_tuple(4, 5, &vpx_sub_pixel_variance16x32_ssse3, 0),
+                      make_tuple(4, 4, &vpx_sub_pixel_variance16x16_ssse3, 0),
+                      make_tuple(4, 3, &vpx_sub_pixel_variance16x8_ssse3, 0),
+                      make_tuple(3, 4, &vpx_sub_pixel_variance8x16_ssse3, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_variance8x8_ssse3, 0),
+                      make_tuple(3, 2, &vpx_sub_pixel_variance8x4_ssse3, 0),
+                      make_tuple(2, 3, &vpx_sub_pixel_variance4x8_ssse3, 0),
+                      make_tuple(2, 2, &vpx_sub_pixel_variance4x4_ssse3, 0)));
 
-const SubpixAvgVarMxNFunc subpel_avg_variance64x64_ssse3 =
-    vpx_sub_pixel_avg_variance64x64_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance64x32_ssse3 =
-    vpx_sub_pixel_avg_variance64x32_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x64_ssse3 =
-    vpx_sub_pixel_avg_variance32x64_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x32_ssse3 =
-    vpx_sub_pixel_avg_variance32x32_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x16_ssse3 =
-    vpx_sub_pixel_avg_variance32x16_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x32_ssse3 =
-    vpx_sub_pixel_avg_variance16x32_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x16_ssse3 =
-    vpx_sub_pixel_avg_variance16x16_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x8_ssse3 =
-    vpx_sub_pixel_avg_variance16x8_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x16_ssse3 =
-    vpx_sub_pixel_avg_variance8x16_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x8_ssse3 =
-    vpx_sub_pixel_avg_variance8x8_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x4_ssse3 =
-    vpx_sub_pixel_avg_variance8x4_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x8_ssse3 =
-    vpx_sub_pixel_avg_variance4x8_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x4_ssse3 =
-    vpx_sub_pixel_avg_variance4x4_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VpxSubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_ssse3, 0),
-                      make_tuple(6, 5, subpel_avg_variance64x32_ssse3, 0),
-                      make_tuple(5, 6, subpel_avg_variance32x64_ssse3, 0),
-                      make_tuple(5, 5, subpel_avg_variance32x32_ssse3, 0),
-                      make_tuple(5, 4, subpel_avg_variance32x16_ssse3, 0),
-                      make_tuple(4, 5, subpel_avg_variance16x32_ssse3, 0),
-                      make_tuple(4, 4, subpel_avg_variance16x16_ssse3, 0),
-                      make_tuple(4, 3, subpel_avg_variance16x8_ssse3, 0),
-                      make_tuple(3, 4, subpel_avg_variance8x16_ssse3, 0),
-                      make_tuple(3, 3, subpel_avg_variance8x8_ssse3, 0),
-                      make_tuple(3, 2, subpel_avg_variance8x4_ssse3, 0),
-                      make_tuple(2, 3, subpel_avg_variance4x8_ssse3, 0),
-                      make_tuple(2, 2, subpel_avg_variance4x4_ssse3, 0)));
+    ::testing::Values(
+        make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_ssse3, 0),
+        make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_ssse3, 0),
+        make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_ssse3, 0),
+        make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_ssse3, 0),
+        make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_ssse3, 0),
+        make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_ssse3, 0),
+        make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_ssse3, 0),
+        make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_ssse3, 0),
+        make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_ssse3, 0),
+        make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_ssse3, 0),
+        make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_ssse3, 0),
+        make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_ssse3, 0),
+        make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_ssse3, 0)));
 #endif  // CONFIG_USE_X86INC
 #endif  // HAVE_SSSE3
 
 #if HAVE_AVX2
-const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2;
 INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest,
-                        ::testing::Values(make_tuple(4, 4, mse16x16_avx2)));
+                        ::testing::Values(make_tuple(4, 4,
+                                                     &vpx_mse16x16_avx2)));
 
-const VarianceMxNFunc variance64x64_avx2 = vpx_variance64x64_avx2;
-const VarianceMxNFunc variance64x32_avx2 = vpx_variance64x32_avx2;
-const VarianceMxNFunc variance32x32_avx2 = vpx_variance32x32_avx2;
-const VarianceMxNFunc variance32x16_avx2 = vpx_variance32x16_avx2;
-const VarianceMxNFunc variance16x16_avx2 = vpx_variance16x16_avx2;
 INSTANTIATE_TEST_CASE_P(
     AVX2, VpxVarianceTest,
-    ::testing::Values(make_tuple(6, 6, variance64x64_avx2, 0),
-                      make_tuple(6, 5, variance64x32_avx2, 0),
-                      make_tuple(5, 5, variance32x32_avx2, 0),
-                      make_tuple(5, 4, variance32x16_avx2, 0),
-                      make_tuple(4, 4, variance16x16_avx2, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_avx2, 0),
+                      make_tuple(6, 5, &vpx_variance64x32_avx2, 0),
+                      make_tuple(5, 5, &vpx_variance32x32_avx2, 0),
+                      make_tuple(5, 4, &vpx_variance32x16_avx2, 0),
+                      make_tuple(4, 4, &vpx_variance16x16_avx2, 0)));
 
-const SubpixVarMxNFunc subpel_variance64x64_avx2 =
-    vpx_sub_pixel_variance64x64_avx2;
-const SubpixVarMxNFunc subpel_variance32x32_avx2 =
-    vpx_sub_pixel_variance32x32_avx2;
 INSTANTIATE_TEST_CASE_P(
     AVX2, VpxSubpelVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_variance64x64_avx2, 0),
-                      make_tuple(5, 5, subpel_variance32x32_avx2, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_avx2, 0),
+                      make_tuple(5, 5, &vpx_sub_pixel_variance32x32_avx2, 0)));
 
-const SubpixAvgVarMxNFunc subpel_avg_variance64x64_avx2 =
-    vpx_sub_pixel_avg_variance64x64_avx2;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x32_avx2 =
-    vpx_sub_pixel_avg_variance32x32_avx2;
 INSTANTIATE_TEST_CASE_P(
     AVX2, VpxSubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_avx2, 0),
-                      make_tuple(5, 5, subpel_avg_variance32x32_avx2, 0)));
+    ::testing::Values(
+        make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_avx2, 0),
+        make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_avx2, 0)));
 #endif  // HAVE_AVX2
 
 #if HAVE_MEDIA
-const VarianceMxNFunc mse16x16_media = vpx_mse16x16_media;
 INSTANTIATE_TEST_CASE_P(MEDIA, VpxMseTest,
-                        ::testing::Values(make_tuple(4, 4, mse16x16_media)));
+                        ::testing::Values(make_tuple(4, 4,
+                                                     &vpx_mse16x16_media)));
 
-const VarianceMxNFunc variance16x16_media = vpx_variance16x16_media;
-const VarianceMxNFunc variance8x8_media = vpx_variance8x8_media;
 INSTANTIATE_TEST_CASE_P(
     MEDIA, VpxVarianceTest,
-    ::testing::Values(make_tuple(4, 4, variance16x16_media, 0),
-                      make_tuple(3, 3, variance8x8_media, 0)));
+    ::testing::Values(make_tuple(4, 4, &vpx_variance16x16_media, 0),
+                      make_tuple(3, 3, &vpx_variance8x8_media, 0)));
 
-const SubpixVarMxNFunc subpel_variance16x16_media =
-    vpx_sub_pixel_variance16x16_media;
-const SubpixVarMxNFunc subpel_variance8x8_media =
-    vpx_sub_pixel_variance8x8_media;
 INSTANTIATE_TEST_CASE_P(
     MEDIA, VpxSubpelVarianceTest,
-    ::testing::Values(make_tuple(4, 4, subpel_variance16x16_media, 0),
-                      make_tuple(3, 3, subpel_variance8x8_media, 0)));
+    ::testing::Values(make_tuple(4, 4, &vpx_sub_pixel_variance16x16_media, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_variance8x8_media, 0)));
 #endif  // HAVE_MEDIA
 
 #if HAVE_NEON
-const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon;
 INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest,
-                        ::testing::Values(make_tuple(2, 2, get4x4sse_cs_neon)));
+                        ::testing::Values(make_tuple(2, 2,
+                                                     &vpx_get4x4sse_cs_neon)));
 
-const VarianceMxNFunc mse16x16_neon = vpx_mse16x16_neon;
 INSTANTIATE_TEST_CASE_P(NEON, VpxMseTest,
-                        ::testing::Values(make_tuple(4, 4, mse16x16_neon)));
+                        ::testing::Values(make_tuple(4, 4,
+                                                     &vpx_mse16x16_neon)));
 
-const VarianceMxNFunc variance64x64_neon = vpx_variance64x64_neon;
-const VarianceMxNFunc variance64x32_neon = vpx_variance64x32_neon;
-const VarianceMxNFunc variance32x64_neon = vpx_variance32x64_neon;
-const VarianceMxNFunc variance32x32_neon = vpx_variance32x32_neon;
-const VarianceMxNFunc variance16x16_neon = vpx_variance16x16_neon;
-const VarianceMxNFunc variance16x8_neon = vpx_variance16x8_neon;
-const VarianceMxNFunc variance8x16_neon = vpx_variance8x16_neon;
-const VarianceMxNFunc variance8x8_neon = vpx_variance8x8_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, VpxVarianceTest,
-    ::testing::Values(make_tuple(6, 6, variance64x64_neon, 0),
-                      make_tuple(6, 5, variance64x32_neon, 0),
-                      make_tuple(5, 6, variance32x64_neon, 0),
-                      make_tuple(5, 5, variance32x32_neon, 0),
-                      make_tuple(4, 4, variance16x16_neon, 0),
-                      make_tuple(4, 3, variance16x8_neon, 0),
-                      make_tuple(3, 4, variance8x16_neon, 0),
-                      make_tuple(3, 3, variance8x8_neon, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_neon, 0),
+                      make_tuple(6, 5, &vpx_variance64x32_neon, 0),
+                      make_tuple(5, 6, &vpx_variance32x64_neon, 0),
+                      make_tuple(5, 5, &vpx_variance32x32_neon, 0),
+                      make_tuple(4, 4, &vpx_variance16x16_neon, 0),
+                      make_tuple(4, 3, &vpx_variance16x8_neon, 0),
+                      make_tuple(3, 4, &vpx_variance8x16_neon, 0),
+                      make_tuple(3, 3, &vpx_variance8x8_neon, 0)));
 
-const SubpixVarMxNFunc subpel_variance64x64_neon =
-    vpx_sub_pixel_variance64x64_neon;
-const SubpixVarMxNFunc subpel_variance32x32_neon =
-    vpx_sub_pixel_variance32x32_neon;
-const SubpixVarMxNFunc subpel_variance16x16_neon =
-    vpx_sub_pixel_variance16x16_neon;
-const SubpixVarMxNFunc subpel_variance8x8_neon = vpx_sub_pixel_variance8x8_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, VpxSubpelVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_variance64x64_neon, 0),
-                      make_tuple(5, 5, subpel_variance32x32_neon, 0),
-                      make_tuple(4, 4, subpel_variance16x16_neon, 0),
-                      make_tuple(3, 3, subpel_variance8x8_neon, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_neon, 0),
+                      make_tuple(5, 5, &vpx_sub_pixel_variance32x32_neon, 0),
+                      make_tuple(4, 4, &vpx_sub_pixel_variance16x16_neon, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_variance8x8_neon, 0)));
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(MSA, SumOfSquaresTest,
                         ::testing::Values(vpx_get_mb_ss_msa));
 
-const Get4x4SseFunc get4x4sse_cs_msa = vpx_get4x4sse_cs_msa;
 INSTANTIATE_TEST_CASE_P(MSA, VpxSseTest,
-                        ::testing::Values(make_tuple(2, 2, get4x4sse_cs_msa)));
+                        ::testing::Values(make_tuple(2, 2,
+                                                     &vpx_get4x4sse_cs_msa)));
 
-const VarianceMxNFunc mse16x16_msa = vpx_mse16x16_msa;
-const VarianceMxNFunc mse16x8_msa = vpx_mse16x8_msa;
-const VarianceMxNFunc mse8x16_msa = vpx_mse8x16_msa;
-const VarianceMxNFunc mse8x8_msa = vpx_mse8x8_msa;
 INSTANTIATE_TEST_CASE_P(MSA, VpxMseTest,
-                        ::testing::Values(make_tuple(4, 4, mse16x16_msa),
-                                          make_tuple(4, 3, mse16x8_msa),
-                                          make_tuple(3, 4, mse8x16_msa),
-                                          make_tuple(3, 3, mse8x8_msa)));
+                        ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_msa),
+                                          make_tuple(4, 3, &vpx_mse16x8_msa),
+                                          make_tuple(3, 4, &vpx_mse8x16_msa),
+                                          make_tuple(3, 3, &vpx_mse8x8_msa)));
 
-const VarianceMxNFunc variance64x64_msa = vpx_variance64x64_msa;
-const VarianceMxNFunc variance64x32_msa = vpx_variance64x32_msa;
-const VarianceMxNFunc variance32x64_msa = vpx_variance32x64_msa;
-const VarianceMxNFunc variance32x32_msa = vpx_variance32x32_msa;
-const VarianceMxNFunc variance32x16_msa = vpx_variance32x16_msa;
-const VarianceMxNFunc variance16x32_msa = vpx_variance16x32_msa;
-const VarianceMxNFunc variance16x16_msa = vpx_variance16x16_msa;
-const VarianceMxNFunc variance16x8_msa = vpx_variance16x8_msa;
-const VarianceMxNFunc variance8x16_msa = vpx_variance8x16_msa;
-const VarianceMxNFunc variance8x8_msa = vpx_variance8x8_msa;
-const VarianceMxNFunc variance8x4_msa = vpx_variance8x4_msa;
-const VarianceMxNFunc variance4x8_msa = vpx_variance4x8_msa;
-const VarianceMxNFunc variance4x4_msa = vpx_variance4x4_msa;
 INSTANTIATE_TEST_CASE_P(
     MSA, VpxVarianceTest,
-    ::testing::Values(make_tuple(6, 6, variance64x64_msa, 0),
-                      make_tuple(6, 5, variance64x32_msa, 0),
-                      make_tuple(5, 6, variance32x64_msa, 0),
-                      make_tuple(5, 5, variance32x32_msa, 0),
-                      make_tuple(5, 4, variance32x16_msa, 0),
-                      make_tuple(4, 5, variance16x32_msa, 0),
-                      make_tuple(4, 4, variance16x16_msa, 0),
-                      make_tuple(4, 3, variance16x8_msa, 0),
-                      make_tuple(3, 4, variance8x16_msa, 0),
-                      make_tuple(3, 3, variance8x8_msa, 0),
-                      make_tuple(3, 2, variance8x4_msa, 0),
-                      make_tuple(2, 3, variance4x8_msa, 0),
-                      make_tuple(2, 2, variance4x4_msa, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_msa, 0),
+                      make_tuple(6, 5, &vpx_variance64x32_msa, 0),
+                      make_tuple(5, 6, &vpx_variance32x64_msa, 0),
+                      make_tuple(5, 5, &vpx_variance32x32_msa, 0),
+                      make_tuple(5, 4, &vpx_variance32x16_msa, 0),
+                      make_tuple(4, 5, &vpx_variance16x32_msa, 0),
+                      make_tuple(4, 4, &vpx_variance16x16_msa, 0),
+                      make_tuple(4, 3, &vpx_variance16x8_msa, 0),
+                      make_tuple(3, 4, &vpx_variance8x16_msa, 0),
+                      make_tuple(3, 3, &vpx_variance8x8_msa, 0),
+                      make_tuple(3, 2, &vpx_variance8x4_msa, 0),
+                      make_tuple(2, 3, &vpx_variance4x8_msa, 0),
+                      make_tuple(2, 2, &vpx_variance4x4_msa, 0)));
 
-const SubpixVarMxNFunc subpel_variance4x4_msa = vpx_sub_pixel_variance4x4_msa;
-const SubpixVarMxNFunc subpel_variance4x8_msa = vpx_sub_pixel_variance4x8_msa;
-const SubpixVarMxNFunc subpel_variance8x4_msa = vpx_sub_pixel_variance8x4_msa;
-const SubpixVarMxNFunc subpel_variance8x8_msa = vpx_sub_pixel_variance8x8_msa;
-const SubpixVarMxNFunc subpel_variance8x16_msa = vpx_sub_pixel_variance8x16_msa;
-const SubpixVarMxNFunc subpel_variance16x8_msa = vpx_sub_pixel_variance16x8_msa;
-const SubpixVarMxNFunc subpel_variance16x16_msa =
-    vpx_sub_pixel_variance16x16_msa;
-const SubpixVarMxNFunc subpel_variance16x32_msa =
-    vpx_sub_pixel_variance16x32_msa;
-const SubpixVarMxNFunc subpel_variance32x16_msa =
-    vpx_sub_pixel_variance32x16_msa;
-const SubpixVarMxNFunc subpel_variance32x32_msa =
-    vpx_sub_pixel_variance32x32_msa;
-const SubpixVarMxNFunc subpel_variance32x64_msa =
-    vpx_sub_pixel_variance32x64_msa;
-const SubpixVarMxNFunc subpel_variance64x32_msa =
-    vpx_sub_pixel_variance64x32_msa;
-const SubpixVarMxNFunc subpel_variance64x64_msa =
-    vpx_sub_pixel_variance64x64_msa;
 INSTANTIATE_TEST_CASE_P(
     MSA, VpxSubpelVarianceTest,
-    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_msa, 0),
-                      make_tuple(2, 3, subpel_variance4x8_msa, 0),
-                      make_tuple(3, 2, subpel_variance8x4_msa, 0),
-                      make_tuple(3, 3, subpel_variance8x8_msa, 0),
-                      make_tuple(3, 4, subpel_variance8x16_msa, 0),
-                      make_tuple(4, 3, subpel_variance16x8_msa, 0),
-                      make_tuple(4, 4, subpel_variance16x16_msa, 0),
-                      make_tuple(4, 5, subpel_variance16x32_msa, 0),
-                      make_tuple(5, 4, subpel_variance32x16_msa, 0),
-                      make_tuple(5, 5, subpel_variance32x32_msa, 0),
-                      make_tuple(5, 6, subpel_variance32x64_msa, 0),
-                      make_tuple(6, 5, subpel_variance64x32_msa, 0),
-                      make_tuple(6, 6, subpel_variance64x64_msa, 0)));
+    ::testing::Values(make_tuple(2, 2, &vpx_sub_pixel_variance4x4_msa, 0),
+                      make_tuple(2, 3, &vpx_sub_pixel_variance4x8_msa, 0),
+                      make_tuple(3, 2, &vpx_sub_pixel_variance8x4_msa, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_variance8x8_msa, 0),
+                      make_tuple(3, 4, &vpx_sub_pixel_variance8x16_msa, 0),
+                      make_tuple(4, 3, &vpx_sub_pixel_variance16x8_msa, 0),
+                      make_tuple(4, 4, &vpx_sub_pixel_variance16x16_msa, 0),
+                      make_tuple(4, 5, &vpx_sub_pixel_variance16x32_msa, 0),
+                      make_tuple(5, 4, &vpx_sub_pixel_variance32x16_msa, 0),
+                      make_tuple(5, 5, &vpx_sub_pixel_variance32x32_msa, 0),
+                      make_tuple(5, 6, &vpx_sub_pixel_variance32x64_msa, 0),
+                      make_tuple(6, 5, &vpx_sub_pixel_variance64x32_msa, 0),
+                      make_tuple(6, 6, &vpx_sub_pixel_variance64x64_msa, 0)));
 
-const SubpixAvgVarMxNFunc subpel_avg_variance64x64_msa =
-    vpx_sub_pixel_avg_variance64x64_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance64x32_msa =
-    vpx_sub_pixel_avg_variance64x32_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x64_msa =
-    vpx_sub_pixel_avg_variance32x64_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x32_msa =
-    vpx_sub_pixel_avg_variance32x32_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x16_msa =
-    vpx_sub_pixel_avg_variance32x16_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x32_msa =
-    vpx_sub_pixel_avg_variance16x32_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x16_msa =
-    vpx_sub_pixel_avg_variance16x16_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x8_msa =
-    vpx_sub_pixel_avg_variance16x8_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x16_msa =
-    vpx_sub_pixel_avg_variance8x16_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x8_msa =
-    vpx_sub_pixel_avg_variance8x8_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x4_msa =
-    vpx_sub_pixel_avg_variance8x4_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x8_msa =
-    vpx_sub_pixel_avg_variance4x8_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x4_msa =
-    vpx_sub_pixel_avg_variance4x4_msa;
 INSTANTIATE_TEST_CASE_P(
     MSA, VpxSubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_msa, 0),
-                      make_tuple(6, 5, subpel_avg_variance64x32_msa, 0),
-                      make_tuple(5, 6, subpel_avg_variance32x64_msa, 0),
-                      make_tuple(5, 5, subpel_avg_variance32x32_msa, 0),
-                      make_tuple(5, 4, subpel_avg_variance32x16_msa, 0),
-                      make_tuple(4, 5, subpel_avg_variance16x32_msa, 0),
-                      make_tuple(4, 4, subpel_avg_variance16x16_msa, 0),
-                      make_tuple(4, 3, subpel_avg_variance16x8_msa, 0),
-                      make_tuple(3, 4, subpel_avg_variance8x16_msa, 0),
-                      make_tuple(3, 3, subpel_avg_variance8x8_msa, 0),
-                      make_tuple(3, 2, subpel_avg_variance8x4_msa, 0),
-                      make_tuple(2, 3, subpel_avg_variance4x8_msa, 0),
-                      make_tuple(2, 2, subpel_avg_variance4x4_msa, 0)));
+    ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_msa, 0),
+                      make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_msa, 0),
+                      make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_msa, 0),
+                      make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_msa, 0),
+                      make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_msa, 0),
+                      make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_msa, 0),
+                      make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_msa, 0),
+                      make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_msa, 0),
+                      make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_msa, 0),
+                      make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_msa, 0),
+                      make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_msa, 0),
+                      make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_msa, 0),
+                      make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_msa, 0)));
 #endif  // HAVE_MSA
 }  // namespace

diff --git a/test/vp9_encoder_parms_get_to_decoder.cc b/test/vp9_encoder_parms_get_to_decoder.cc
index 0984e6a..3ef6022 100644
--- a/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/test/vp9_encoder_parms_get_to_decoder.cc

@@ -14,9 +14,7 @@
 #include "test/encode_test_driver.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
-#include "vp9/decoder/vp9_decoder.h"
-
-#include "vp9/vp9_dx_iface.c"
+#include "vp9/vp9_dx_iface.h"
 
 namespace {
 
@@ -40,17 +38,17 @@
   int32_t lossless;
   int32_t error_resilient;
   int32_t frame_parallel;
-  int32_t color_range;
+  vpx_color_range_t color_range;
   vpx_color_space_t cs;
   int render_size[2];
   // TODO(JBB): quantizers / bitrate
 };
 
 const EncodeParameters kVP9EncodeParameterSet[] = {
-  {0, 0, 0, 1, 0, 0, VPX_CS_BT_601},
-  {0, 0, 0, 0, 0, 1, VPX_CS_BT_709},
-  {0, 0, 1, 0, 0, 1, VPX_CS_BT_2020},
-  {0, 2, 0, 0, 1, 0, VPX_CS_UNKNOWN, { 640, 480 }},
+  {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601},
+  {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709},
+  {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020},
+  {0, 2, 0, 0, 1, VPX_CR_STUDIO_RANGE, VPX_CS_UNKNOWN, { 640, 480 }},
   // TODO(JBB): Test profiles (requires more work).
 };
 

diff --git a/test/vp9_error_block_test.cc b/test/vp9_error_block_test.cc
index d779706..77b12ea 100644
--- a/test/vp9_error_block_test.cc
+++ b/test/vp9_error_block_test.cc

@@ -67,12 +67,22 @@
   int64_t ret;
   int64_t ref_ssz;
   int64_t ref_ret;
+  const int msb = bit_depth_ + 8 - 1;
   for (int i = 0; i < kNumIterations; ++i) {
     int err_count = 0;
     block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
     for (int j = 0; j < block_size; j++) {
-      coeff[j]   = rnd(2 << 20) - (1 << 20);
-      dqcoeff[j] = rnd(2 << 20) - (1 << 20);
+      // coeff and dqcoeff will always have at least the same sign, and this
+      // can be used for optimization, so generate test input precisely.
+      if (rnd(2)) {
+        // Positive number
+        coeff[j]   = rnd(1 << msb);
+        dqcoeff[j] = rnd(1 << msb);
+      } else {
+        // Negative number
+        coeff[j]   = -rnd(1 << msb);
+        dqcoeff[j] = -rnd(1 << msb);
+      }
     }
     ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
                                   bit_depth_);
@@ -85,7 +95,7 @@
     err_count_total += err_count;
   }
   EXPECT_EQ(0, err_count_total)
-      << "Error: Error Block Test, C output doesn't match SSE2 output. "
+      << "Error: Error Block Test, C output doesn't match optimized output. "
       << "First failed at test case " << first_failure;
 }
 
@@ -100,23 +110,36 @@
   int64_t ret;
   int64_t ref_ssz;
   int64_t ref_ret;
-  int max_val = ((1 << 20) - 1);
+  const int msb = bit_depth_ + 8 - 1;
+  int max_val = ((1 << msb) - 1);
   for (int i = 0; i < kNumIterations; ++i) {
     int err_count = 0;
-    int k = (i / 9) % 5;
+    int k = (i / 9) % 9;
 
     // Change the maximum coeff value, to test different bit boundaries
-    if ( k == 4 && (i % 9) == 0 ) {
+    if ( k == 8 && (i % 9) == 0 ) {
       max_val >>= 1;
     }
     block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
     for (int j = 0; j < block_size; j++) {
-      if (k < 4) {  // Test at maximum values
-        coeff[j]   = k % 2 ? max_val : -max_val;
-        dqcoeff[j] = (k >> 1) % 2 ? max_val : -max_val;
+      if (k < 4) {
+        // Test at positive maximum values
+        coeff[j]   = k % 2 ? max_val : 0;
+        dqcoeff[j] = (k >> 1) % 2 ? max_val : 0;
+      } else if (k < 8) {
+        // Test at negative maximum values
+        coeff[j]   = k % 2 ? -max_val : 0;
+        dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0;
       } else {
-        coeff[j]   = rnd(2 << 14) - (1 << 14);
-        dqcoeff[j] = rnd(2 << 14) - (1 << 14);
+        if (rnd(2)) {
+          // Positive number
+          coeff[j]   = rnd(1 << 14);
+          dqcoeff[j] = rnd(1 << 14);
+        } else {
+          // Negative number
+          coeff[j]   = -rnd(1 << 14);
+          dqcoeff[j] = -rnd(1 << 14);
+        }
       }
     }
     ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
@@ -130,21 +153,13 @@
     err_count_total += err_count;
   }
   EXPECT_EQ(0, err_count_total)
-      << "Error: Error Block Test, C output doesn't match SSE2 output. "
+      << "Error: Error Block Test, C output doesn't match optimized output. "
       << "First failed at test case " << first_failure;
 }
 
 using std::tr1::make_tuple;
 
-#if CONFIG_USE_X86INC && HAVE_SSE2
-int64_t wrap_vp9_highbd_block_error_8bit_sse2(const tran_low_t *coeff,
-                                              const tran_low_t *dqcoeff,
-                                              intptr_t block_size,
-                                              int64_t *ssz, int bps) {
-  assert(bps == 8);
-  return vp9_highbd_block_error_8bit_sse2(coeff, dqcoeff, block_size, ssz);
-}
-
+#if CONFIG_USE_X86INC
 int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
                                            const tran_low_t *dqcoeff,
                                            intptr_t block_size,
@@ -153,6 +168,15 @@
   return vp9_highbd_block_error_8bit_c(coeff, dqcoeff, block_size, ssz);
 }
 
+#if HAVE_SSE2
+int64_t wrap_vp9_highbd_block_error_8bit_sse2(const tran_low_t *coeff,
+                                              const tran_low_t *dqcoeff,
+                                              intptr_t block_size,
+                                              int64_t *ssz, int bps) {
+  assert(bps == 8);
+  return vp9_highbd_block_error_8bit_sse2(coeff, dqcoeff, block_size, ssz);
+}
+
 INSTANTIATE_TEST_CASE_P(
     SSE2, ErrorBlockTest,
     ::testing::Values(
@@ -165,5 +189,23 @@
         make_tuple(&wrap_vp9_highbd_block_error_8bit_sse2,
                    &wrap_vp9_highbd_block_error_8bit_c, VPX_BITS_8)));
 #endif  // HAVE_SSE2
+
+#if HAVE_AVX
+int64_t wrap_vp9_highbd_block_error_8bit_avx(const tran_low_t *coeff,
+                                              const tran_low_t *dqcoeff,
+                                              intptr_t block_size,
+                                              int64_t *ssz, int bps) {
+  assert(bps == 8);
+  return vp9_highbd_block_error_8bit_avx(coeff, dqcoeff, block_size, ssz);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    AVX, ErrorBlockTest,
+    ::testing::Values(
+        make_tuple(&wrap_vp9_highbd_block_error_8bit_avx,
+                   &wrap_vp9_highbd_block_error_8bit_c, VPX_BITS_8)));
+#endif  // HAVE_AVX
+
+#endif  // CONFIG_USE_X86INC
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace

diff --git a/test/vp9_spatial_svc_encoder.sh b/test/vp9_spatial_svc_encoder.sh
index 6dd5f17..6503107 100755
--- a/test/vp9_spatial_svc_encoder.sh
+++ b/test/vp9_spatial_svc_encoder.sh

@@ -54,7 +54,7 @@
   if [ "$(vp9_encode_available)" = "yes" ]; then
     local readonly test_name="vp9_spatial_svc"
     for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
-      vp9_spatial_svc_encoder "${test_name}" -l ${layers}
+      vp9_spatial_svc_encoder "${test_name}" -sl ${layers}
     done
   fi
 }

diff --git a/vp10/common/blockd.c b/vp10/common/blockd.c
index 5394b5e..b6f910f 100644
--- a/vp10/common/blockd.c
+++ b/vp10/common/blockd.c

@@ -66,7 +66,7 @@
   for (r = 0; r < max_blocks_high; r += (1 << tx_size)) {
     // Skip visiting the sub blocks that are wholly within the UMV.
     for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) {
-      visit(plane, i, plane_bsize, tx_size, arg);
+      visit(plane, i, r, c, plane_bsize, tx_size, arg);
       i += step;
     }
     i += extra_step;

diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 6d921b5..b89d791 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h

@@ -186,8 +186,6 @@
   int up_available;
   int left_available;
 
-  const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
-
   /* Distance of MB away from frame edges */
   int mb_to_left_edge;
   int mb_to_right_edge;
@@ -213,7 +211,7 @@
   int bd;
 #endif
 
-  int lossless;
+  int lossless[MAX_SEGMENTS];
   int corrupted;
 
   struct vpx_internal_error_info *error_info;
@@ -242,8 +240,8 @@
   const MODE_INFO *const mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
-  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi) ||
-      mbmi->tx_size >= TX_32X32)
+  if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
+      is_inter_block(mbmi) || mbmi->tx_size >= TX_32X32)
     return DCT_DCT;
 
   return intra_mode_to_tx_type_lookup[get_y_mode(mi, block_idx)];
@@ -284,16 +282,8 @@
   }
 }
 
-static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi,
-                                               const MODE_INFO *above_mi,
-                                               const MODE_INFO *left_mi,
-                                               int block) {
-  const PREDICTION_MODE above = vp10_above_block_mode(mi, above_mi, block);
-  const PREDICTION_MODE left = vp10_left_block_mode(mi, left_mi, block);
-  return vp10_kf_y_mode_prob[above][left];
-}
-
 typedef void (*foreach_transformed_block_visitor)(int plane, int block,
+                                                  int blk_row, int blk_col,
                                                   BLOCK_SIZE plane_bsize,
                                                   TX_SIZE tx_size,
                                                   void *arg);
@@ -307,17 +297,6 @@
     const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
     foreach_transformed_block_visitor visit, void *arg);
 
-static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
-                                            TX_SIZE tx_size, int block,
-                                            int *x, int *y) {
-  const int bwl = b_width_log2_lookup[plane_bsize];
-  const int tx_cols_log2 = bwl - tx_size;
-  const int tx_cols = 1 << tx_cols_log2;
-  const int raster_mb = block >> (tx_size << 1);
-  *x = (raster_mb & (tx_cols - 1)) << tx_size;
-  *y = (raster_mb >> tx_cols_log2) << tx_size;
-}
-
 void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
                       BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
                       int aoff, int loff);

diff --git a/vp10/common/common_data.h b/vp10/common/common_data.h
index 37e5751..334489c 100644
--- a/vp10/common/common_data.h
+++ b/vp10/common/common_data.h

@@ -31,6 +31,8 @@
 // Log 2 conversion lookup tables for modeinfo width and height
 static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] =
   {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
+static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] =
+  {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
 static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] =
   {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8};
 static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] =

diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c
index 56dd73a..3da08a6 100644
--- a/vp10/common/entropy.c
+++ b/vp10/common/entropy.c

@@ -403,7 +403,6 @@
   {255, 241, 243, 255, 236, 255, 252, 254},
   {255, 243, 245, 255, 237, 255, 252, 254},
   {255, 246, 247, 255, 239, 255, 253, 255},
-  {255, 246, 247, 255, 239, 255, 253, 255},
 };
 
 static const vp10_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
@@ -743,7 +742,9 @@
 };
 
 static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
-  memcpy(probs, vp10_pareto8_full[p = 0 ? 0 : p - 1],
+  // TODO(aconverse): model[PIVOT_NODE] should never be zero.
+  // https://code.google.com/p/webm/issues/detail?id=1089
+  memcpy(probs, vp10_pareto8_full[p == 0 ? 254 : p - 1],
          MODEL_NODES * sizeof(vpx_prob));
 }
 

diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index fba7020..2f93cb3 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h

@@ -153,7 +153,7 @@
 // 1, 3, 5, 7, ..., 253, 255
 // In between probabilities are interpolated linearly
 
-#define COEFF_PROB_MODELS 256
+#define COEFF_PROB_MODELS 255
 
 #define UNCONSTRAINED_NODES         3
 

diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 370e1c2..8098305 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c

@@ -127,6 +127,7 @@
   }
 };
 
+#if !CONFIG_MISC_FIXES
 const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = {
   { 144,  11,  54, 157, 195, 130,  46,  58, 108 },  // y = dc
   { 118,  15, 123, 148, 131, 101,  44,  93, 131 },  // y = v
@@ -139,6 +140,7 @@
   { 116,  12,  64, 120, 140, 125,  49, 115, 121 },  // y = d63
   { 102,  19,  66, 162, 182, 122,  35,  59, 128 }   // y = tm
 };
+#endif
 
 static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
   {  65,  32,  18, 144, 162, 194,  41,  51,  98 },  // block_size < 8x8
@@ -147,7 +149,7 @@
   { 221, 135,  38, 194, 248, 121,  96,  85,  29 }   // block_size >= 32x32
 };
 
-static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = {
+static const vpx_prob default_uv_probs[INTRA_MODES][INTRA_MODES - 1] = {
   { 120,   7,  76, 176, 208, 126,  28,  54, 103 },  // y = dc
   {  48,  12, 154, 155, 139,  90,  34, 117, 119 },  // y = v
   {  67,   6,  25, 204, 243, 158,  13,  21,  96 },  // y = h
@@ -160,6 +162,7 @@
   { 101,  21, 107, 181, 192, 103,  19,  67, 125 }   // y = tm
 };
 
+#if !CONFIG_MISC_FIXES
 const vpx_prob vp10_kf_partition_probs[PARTITION_CONTEXTS]
                                      [PARTITION_TYPES - 1] = {
   // 8x8 -> 4x4
@@ -183,6 +186,7 @@
   {  57,  15,   9 },  // l split, a not split
   {  12,   3,   3 },  // a/l both split
 };
+#endif
 
 static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
                                              [PARTITION_TYPES - 1] = {
@@ -285,7 +289,8 @@
 };
 
 // TODO(huisu): tune these probs
-const vpx_prob vp10_default_palette_y_size_prob[10][PALETTE_SIZES - 1] = {
+const vpx_prob
+vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
     {  96,  89, 100,  64,  77, 130},
     {  22,  15,  44,  16,  34,  82},
     {  30,  19,  57,  18,  38,  86},
@@ -298,7 +303,8 @@
     {  98, 105, 142,  63,  64, 152},
 };
 
-const vpx_prob vp10_default_palette_uv_size_prob[10][PALETTE_SIZES - 1] = {
+const vpx_prob
+vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
     { 160, 196, 228, 213, 175, 230},
     {  87, 148, 208, 141, 166, 163},
     {  72, 151, 204, 139, 155, 161},
@@ -311,7 +317,9 @@
     {  72,  55,  66,  68,  79, 107},
 };
 
-const vpx_prob vp10_default_palette_y_mode_prob[10][3] = {
+const vpx_prob
+vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]
+                                                      = {
     { 240,  180,  100, },
     { 240,  180,  100, },
     { 240,  180,  100, },
@@ -746,8 +754,16 @@
   { 149, 144, },
 };
 
+#if CONFIG_MISC_FIXES
+// FIXME(someone) need real defaults here
+static const struct segmentation_probs default_seg_probs = {
+  { 128, 128, 128, 128, 128, 128, 128 },
+  { 128, 128, 128 },
+};
+#endif
+
 static void init_mode_probs(FRAME_CONTEXT *fc) {
-  vp10_copy(fc->uv_mode_prob, default_if_uv_probs);
+  vp10_copy(fc->uv_mode_prob, default_uv_probs);
   vp10_copy(fc->y_mode_prob, default_if_y_probs);
   vp10_copy(fc->switchable_interp_prob, default_switchable_interp_prob);
   vp10_copy(fc->partition_prob, default_partition_probs);
@@ -758,6 +774,10 @@
   fc->tx_probs = default_tx_probs;
   vp10_copy(fc->skip_probs, default_skip_probs);
   vp10_copy(fc->inter_mode_probs, default_inter_mode_probs);
+#if CONFIG_MISC_FIXES
+  vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs);
+  vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs);
+#endif
 }
 
 const vpx_tree_index vp10_switchable_interp_tree
@@ -794,6 +814,7 @@
     vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->y_mode_prob[i],
                 counts->y_mode[i], fc->y_mode_prob[i]);
 
+#if !CONFIG_MISC_FIXES
   for (i = 0; i < INTRA_MODES; ++i)
     vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->uv_mode_prob[i],
                          counts->uv_mode[i], fc->uv_mode_prob[i]);
@@ -801,6 +822,7 @@
   for (i = 0; i < PARTITION_CONTEXTS; i++)
     vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
                          counts->partition[i], fc->partition_prob[i]);
+#endif
 
   if (cm->interp_filter == SWITCHABLE) {
     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
@@ -844,6 +866,28 @@
   for (i = 0; i < SKIP_CONTEXTS; ++i)
     fc->skip_probs[i] = mode_mv_merge_probs(
         pre_fc->skip_probs[i], counts->skip[i]);
+
+#if CONFIG_MISC_FIXES
+  if (cm->seg.temporal_update) {
+    for (i = 0; i < PREDICTION_PROBS; i++)
+      fc->seg.pred_probs[i] = mode_mv_merge_probs(pre_fc->seg.pred_probs[i],
+                                                  counts->seg.pred[i]);
+
+    vpx_tree_merge_probs(vp10_segment_tree, pre_fc->seg.tree_probs,
+                         counts->seg.tree_mispred, fc->seg.tree_probs);
+  } else {
+    vpx_tree_merge_probs(vp10_segment_tree, pre_fc->seg.tree_probs,
+                         counts->seg.tree_total, fc->seg.tree_probs);
+  }
+
+  for (i = 0; i < INTRA_MODES; ++i)
+    vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->uv_mode_prob[i],
+                         counts->uv_mode[i], fc->uv_mode_prob[i]);
+
+  for (i = 0; i < PARTITION_CONTEXTS; i++)
+    vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
+                         counts->partition[i], fc->partition_prob[i]);
+#endif
 }
 
 static void set_default_lf_deltas(struct loopfilter *lf) {

diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index 0981263..c9b667b 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h

@@ -14,6 +14,7 @@
 #include "vp10/common/entropy.h"
 #include "vp10/common/entropymv.h"
 #include "vp10/common/filter.h"
+#include "vp10/common/seg_common.h"
 #include "vpx_dsp/vpx_filter.h"
 
 #ifdef __cplusplus
@@ -47,6 +48,12 @@
   unsigned int tx_totals[TX_SIZES];
 };
 
+struct seg_counts {
+  unsigned int tree_total[MAX_SEGMENTS];
+  unsigned int tree_mispred[MAX_SEGMENTS];
+  unsigned int pred[PREDICTION_PROBS][2];
+};
+
 typedef struct frame_contexts {
   vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
   vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
@@ -62,10 +69,14 @@
   struct tx_probs tx_probs;
   vpx_prob skip_probs[SKIP_CONTEXTS];
   nmv_context nmvc;
+#if CONFIG_MISC_FIXES
+  struct segmentation_probs seg;
+#endif
   int initialized;
 } FRAME_CONTEXT;
 
 typedef struct FRAME_COUNTS {
+  unsigned int kf_y_mode[INTRA_MODES][INTRA_MODES][INTRA_MODES];
   unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
   unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
   unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
@@ -82,13 +93,18 @@
   struct tx_counts tx;
   unsigned int skip[SKIP_CONTEXTS][2];
   nmv_context_counts mv;
+#if CONFIG_MISC_FIXES
+  struct seg_counts seg;
+#endif
 } FRAME_COUNTS;
 
-extern const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
 extern const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
                                         [INTRA_MODES - 1];
+#if !CONFIG_MISC_FIXES
+extern const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
 extern const vpx_prob vp10_kf_partition_probs[PARTITION_CONTEXTS]
                                             [PARTITION_TYPES - 1];
+#endif
 extern const vpx_prob
 vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS];
 extern const vpx_prob

diff --git a/vp10/common/entropymv.c b/vp10/common/entropymv.c
index fd11d90..a9946ee 100644
--- a/vp10/common/entropymv.c
+++ b/vp10/common/entropymv.c

@@ -128,7 +128,7 @@
 }
 
 int vp10_use_mv_hp(const MV *ref) {
-#if CONFIG_UNIVERSAL_HP
+#if CONFIG_MISC_FIXES
   (void) ref;
   return 1;
 #else

diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index 9bf701f..1ef80c2 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c

@@ -27,9 +27,13 @@
   const MV_REF *const  prev_frame_mvs = cm->use_prev_frame_mvs ?
       cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
   const TileInfo *const tile = &xd->tile;
+  const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
+  const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
 
+#if !CONFIG_MISC_FIXES
   // Blank the reference vector list
   memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+#endif
 
   // The nearest 2 blocks are treated differently
   // if the size < 8x8 we get the mv from the bmi substructure,
@@ -46,10 +50,10 @@
 
       if (candidate->ref_frame[0] == ref_frame)
         ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
-                        refmv_count, mv_ref_list, Done);
+                        refmv_count, mv_ref_list, bw, bh, xd, Done);
       else if (candidate->ref_frame[1] == ref_frame)
         ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
-                        refmv_count, mv_ref_list, Done);
+                        refmv_count, mv_ref_list, bw, bh, xd, Done);
     }
   }
 
@@ -64,9 +68,11 @@
       different_ref_found = 1;
 
       if (candidate->ref_frame[0] == ref_frame)
-        ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
+        ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list,
+                        bw, bh, xd, Done);
       else if (candidate->ref_frame[1] == ref_frame)
-        ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, Done);
+        ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list,
+                        bw, bh, xd, Done);
     }
   }
 
@@ -88,9 +94,11 @@
     }
 
     if (prev_frame_mvs->ref_frame[0] == ref_frame) {
-      ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
+      ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list,
+                      bw, bh, xd, Done);
     } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
-      ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done);
+      ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list,
+                      bw, bh, xd, Done);
     }
   }
 
@@ -106,7 +114,7 @@
 
         // If the candidate is INTRA we don't want to consider its mv.
         IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
-                                 refmv_count, mv_ref_list, Done);
+                                 refmv_count, mv_ref_list, bw, bh, xd, Done);
       }
     }
   }
@@ -121,7 +129,7 @@
         mv.as_mv.row *= -1;
         mv.as_mv.col *= -1;
       }
-      ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done);
+      ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done);
     }
 
     if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
@@ -135,7 +143,7 @@
         mv.as_mv.row *= -1;
         mv.as_mv.col *= -1;
       }
-      ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done);
+      ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done);
     }
   }
 
@@ -143,9 +151,14 @@
 
   mode_context[ref_frame] = counter_to_context[context_counter];
 
+#if CONFIG_MISC_FIXES
+  for (i = refmv_count; i < MAX_MV_REF_CANDIDATES; ++i)
+      mv_ref_list[i].as_int = 0;
+#else
   // Clamp vectors
   for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
-    clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
+    clamp_mv_ref(&mv_ref_list[i].as_mv, bw, bh, xd);
+#endif
 }
 
 void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,

diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index e92ad52..0a98866 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h

@@ -119,13 +119,26 @@
 };
 
 // clamp_mv_ref
+#if CONFIG_MISC_FIXES
+#define MV_BORDER (8 << 3)  // Allow 8 pels in 1/8th pel units
+#else
 #define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
+#endif
 
-static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
+static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
+#if CONFIG_MISC_FIXES
+  clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
+               xd->mb_to_right_edge + bw * 8 + MV_BORDER,
+               xd->mb_to_top_edge - bh * 8 - MV_BORDER,
+               xd->mb_to_bottom_edge + bh * 8 + MV_BORDER);
+#else
+  (void) bw;
+  (void) bh;
   clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
                xd->mb_to_right_edge + MV_BORDER,
                xd->mb_to_top_edge - MV_BORDER,
                xd->mb_to_bottom_edge + MV_BORDER);
+#endif
 }
 
 // This function returns either the appropriate sub block or block's mv
@@ -151,36 +164,41 @@
   return mv;
 }
 
+#if CONFIG_MISC_FIXES
+#define CLIP_IN_ADD(mv, bw, bh, xd) clamp_mv_ref(mv, bw, bh, xd)
+#else
+#define CLIP_IN_ADD(mv, bw, bh, xd) do {} while (0)
+#endif
+
 // This macro is used to add a motion vector mv_ref list if it isn't
 // already in the list.  If it's the second motion vector it will also
 // skip all additional processing and jump to done!
-#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \
+#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done) \
   do { \
-    if (refmv_count) { \
-      if ((mv).as_int != (mv_ref_list)[0].as_int) { \
-        (mv_ref_list)[(refmv_count)] = (mv); \
+    (mv_ref_list)[(refmv_count)] = (mv); \
+    CLIP_IN_ADD(&(mv_ref_list)[(refmv_count)].as_mv, (bw), (bh), (xd)); \
+    if (refmv_count && (mv_ref_list)[1].as_int != (mv_ref_list)[0].as_int) { \
+        (refmv_count) = 2; \
         goto Done; \
-      } \
-    } else { \
-      (mv_ref_list)[(refmv_count)++] = (mv); \
     } \
+    (refmv_count) = 1; \
   } while (0)
 
 // If either reference frame is different, not INTRA, and they
 // are different from each other scale and add the mv to our list.
 #define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \
-                                 mv_ref_list, Done) \
+                                 mv_ref_list, bw, bh, xd, Done) \
   do { \
     if (is_inter_block(mbmi)) { \
       if ((mbmi)->ref_frame[0] != ref_frame) \
         ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
-                        refmv_count, mv_ref_list, Done); \
+                        refmv_count, mv_ref_list, bw, bh, xd, Done); \
       if (has_second_ref(mbmi) && \
           (CONFIG_MISC_FIXES || \
            (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) && \
           (mbmi)->ref_frame[1] != ref_frame) \
         ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
-                        refmv_count, mv_ref_list, Done); \
+                        refmv_count, mv_ref_list, bw, bh, xd, Done); \
     } \
   } while (0)
 

diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index 73157b8..6814133 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h

@@ -254,6 +254,9 @@
 
   struct loopfilter lf;
   struct segmentation seg;
+#if !CONFIG_MISC_FIXES
+  struct segmentation_probs segp;
+#endif
 
   int frame_parallel_decode;  // frame-based threading.
 
@@ -299,6 +302,11 @@
   PARTITION_CONTEXT *above_seg_context;
   ENTROPY_CONTEXT *above_context;
   int above_context_alloc_cols;
+
+  // scratch memory for intraonly/keyframe forward updates from default tables
+  // - this is intentionally not placed in FRAME_CONTEXT since it's reset upon
+  // each keyframe and not used afterwards
+  vpx_prob kf_y_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1];
 } VP10_COMMON;
 
 // TODO(hkuang): Don't need to lock the whole pool after implementing atomic
@@ -371,14 +379,6 @@
   return cm->frame_type == KEY_FRAME || cm->intra_only;
 }
 
-static INLINE void set_partition_probs(const VP10_COMMON *const cm,
-                                       MACROBLOCKD *const xd) {
-  xd->partition_probs =
-      frame_is_intra_only(cm) ?
-          &vp10_kf_partition_probs[0] :
-          (const vpx_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob;
-}
-
 static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
                                         tran_low_t *dqcoeff) {
   int i;
@@ -399,13 +399,6 @@
   xd->above_seg_context = cm->above_seg_context;
   xd->mi_stride = cm->mi_stride;
   xd->error_info = &cm->error;
-
-  set_partition_probs(cm, xd);
-}
-
-static INLINE const vpx_prob* get_partition_probs(const MACROBLOCKD *xd,
-                                                  int ctx) {
-  return xd->partition_probs[ctx];
 }
 
 static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
@@ -455,6 +448,16 @@
   }
 }
 
+static INLINE const vpx_prob *get_y_mode_probs(const VP10_COMMON *cm,
+                                               const MODE_INFO *mi,
+                                               const MODE_INFO *above_mi,
+                                               const MODE_INFO *left_mi,
+                                               int block) {
+  const PREDICTION_MODE above = vp10_above_block_mode(mi, above_mi, block);
+  const PREDICTION_MODE left = vp10_left_block_mode(mi, left_mi, block);
+  return cm->kf_y_prob[above][left];
+}
+
 static INLINE void update_partition_context(MACROBLOCKD *xd,
                                             int mi_row, int mi_col,
                                             BLOCK_SIZE subsize,

diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h
index 1b55f53..d6d7146 100644
--- a/vp10/common/pred_common.h
+++ b/vp10/common/pred_common.h

@@ -48,9 +48,9 @@
   return above_sip + left_sip;
 }
 
-static INLINE vpx_prob vp10_get_pred_prob_seg_id(const struct segmentation *seg,
-                                                const MACROBLOCKD *xd) {
-  return seg->pred_probs[vp10_get_pred_context_seg_id(xd)];
+static INLINE vpx_prob vp10_get_pred_prob_seg_id(
+    const struct segmentation_probs *segp, const MACROBLOCKD *xd) {
+  return segp->pred_probs[vp10_get_pred_context_seg_id(xd)];
 }
 
 static INLINE int vp10_get_skip_context(const MACROBLOCKD *xd) {

diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index 5dc1a82..14af7eb 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c

@@ -21,6 +21,28 @@
 #include "vp10/common/reconintra.h"
 #include "vp10/common/onyxc_int.h"
 
+#if CONFIG_MISC_FIXES
+enum {
+  NEED_LEFT = 1 << 1,
+  NEED_ABOVE = 1 << 2,
+  NEED_ABOVERIGHT = 1 << 3,
+  NEED_ABOVELEFT = 1 << 4,
+  NEED_BOTTOMLEFT = 1 << 5,
+};
+
+static const uint8_t extend_modes[INTRA_MODES] = {
+  NEED_ABOVE | NEED_LEFT,                   // DC
+  NEED_ABOVE,                               // V
+  NEED_LEFT,                                // H
+  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D117
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D153
+  NEED_LEFT | NEED_BOTTOMLEFT,              // D207
+  NEED_ABOVE | NEED_ABOVERIGHT,             // D63
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // TM
+};
+#else
 enum {
   NEED_LEFT = 1 << 1,
   NEED_ABOVE = 1 << 2,
@@ -39,6 +61,134 @@
   NEED_ABOVERIGHT,              // D63
   NEED_LEFT | NEED_ABOVE,       // TM
 };
+#endif
+
+#if CONFIG_MISC_FIXES
+static const uint8_t orders_64x64[1] = { 0 };
+static const uint8_t orders_64x32[2] = { 0, 1 };
+static const uint8_t orders_32x64[2] = { 0, 1 };
+static const uint8_t orders_32x32[4] = {
+  0, 1,
+  2, 3,
+};
+static const uint8_t orders_32x16[8] = {
+  0, 2,
+  1, 3,
+  4, 6,
+  5, 7,
+};
+static const uint8_t orders_16x32[8] = {
+  0, 1, 2, 3,
+  4, 5, 6, 7,
+};
+static const uint8_t orders_16x16[16] = {
+  0,   1,  4,  5,
+  2,   3,  6,  7,
+  8,   9, 12, 13,
+  10, 11, 14, 15,
+};
+static const uint8_t orders_16x8[32] = {
+  0,   2,  8, 10,
+  1,   3,  9, 11,
+  4,   6, 12, 14,
+  5,   7, 13, 15,
+  16, 18, 24, 26,
+  17, 19, 25, 27,
+  20, 22, 28, 30,
+  21, 23, 29, 31,
+};
+static const uint8_t orders_8x16[32] = {
+  0,   1,  2,  3,  8,  9, 10, 11,
+  4,   5,  6,  7, 12, 13, 14, 15,
+  16, 17, 18, 19, 24, 25, 26, 27,
+  20, 21, 22, 23, 28, 29, 30, 31,
+};
+static const uint8_t orders_8x8[64] = {
+  0,   1,  4,  5, 16, 17, 20, 21,
+  2,   3,  6,  7, 18, 19, 22, 23,
+  8,   9, 12, 13, 24, 25, 28, 29,
+  10, 11, 14, 15, 26, 27, 30, 31,
+  32, 33, 36, 37, 48, 49, 52, 53,
+  34, 35, 38, 39, 50, 51, 54, 55,
+  40, 41, 44, 45, 56, 57, 60, 61,
+  42, 43, 46, 47, 58, 59, 62, 63,
+};
+static const uint8_t *const orders[BLOCK_SIZES] = {
+  orders_8x8, orders_8x8, orders_8x8, orders_8x8,
+  orders_8x16, orders_16x8, orders_16x16,
+  orders_16x32, orders_32x16, orders_32x32,
+  orders_32x64, orders_64x32, orders_64x64,
+};
+
+static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
+                          int right_available,
+                          TX_SIZE txsz, int y, int x, int ss_x) {
+  if (y == 0) {
+    int wl = mi_width_log2_lookup[bsize];
+    int hl = mi_height_log2_lookup[bsize];
+    int w = 1 << (wl + 1 - ss_x);
+    int step = 1 << txsz;
+    const uint8_t *order = orders[bsize];
+    int my_order, tr_order;
+
+    if (x + step < w)
+      return 1;
+
+    mi_row = (mi_row & 7) >> hl;
+    mi_col = (mi_col & 7) >> wl;
+
+    if (mi_row == 0)
+      return right_available;
+
+    if (((mi_col + 1) << wl) >= 8)
+      return 0;
+
+    my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0];
+    tr_order = order[((mi_row - 1) << (3 - wl)) + mi_col + 1];
+
+    return my_order > tr_order && right_available;
+  } else {
+    int wl = mi_width_log2_lookup[bsize];
+    int w = 1 << (wl + 1 - ss_x);
+    int step = 1 << txsz;
+
+    return x + step < w;
+  }
+}
+
+static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col,
+                           int bottom_available, TX_SIZE txsz,
+                           int y, int x, int ss_y) {
+  if (x == 0) {
+    int wl = mi_width_log2_lookup[bsize];
+    int hl = mi_height_log2_lookup[bsize];
+    int h = 1 << (hl + 1 - ss_y);
+    int step = 1 << txsz;
+    const uint8_t *order = orders[bsize];
+    int my_order, bl_order;
+
+    mi_row = (mi_row & 7) >> hl;
+    mi_col = (mi_col & 7) >> wl;
+
+    if (mi_col == 0)
+      return bottom_available &&
+             (mi_row << (hl + !ss_y)) + y + step < (8 << !ss_y);
+
+    if (((mi_row + 1) << hl) >= 8)
+      return 0;
+
+    if (y + step < h)
+      return 1;
+
+    my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0];
+    bl_order = order[((mi_row + 1) << (3 - wl)) + mi_col - 1];
+
+    return bl_order < my_order && bottom_available;
+  } else {
+    return 0;
+  }
+}
+#endif
 
 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
                               const uint8_t *above, const uint8_t *left);
@@ -66,14 +216,15 @@
 
   INIT_ALL_SIZES(pred[V_PRED], v);
   INIT_ALL_SIZES(pred[H_PRED], h);
-  INIT_ALL_SIZES(pred[D207_PRED], d207);
 #if CONFIG_MISC_FIXES
-  pred[D45_PRED][TX_4X4] = vpx_d45e_predictor_4x4;
-  INIT_NO_4X4(pred[D45_PRED], d45);
+  INIT_ALL_SIZES(pred[D207_PRED], d207e);
+  INIT_ALL_SIZES(pred[D45_PRED], d45e);
+  INIT_ALL_SIZES(pred[D63_PRED], d63e);
 #else
+  INIT_ALL_SIZES(pred[D207_PRED], d207);
   INIT_ALL_SIZES(pred[D45_PRED], d45);
-#endif
   INIT_ALL_SIZES(pred[D63_PRED], d63);
+#endif
   INIT_ALL_SIZES(pred[D117_PRED], d117);
   INIT_ALL_SIZES(pred[D135_PRED], d135);
   INIT_ALL_SIZES(pred[D153_PRED], d153);
@@ -87,14 +238,15 @@
 #if CONFIG_VP9_HIGHBITDEPTH
   INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
   INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
-  INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207);
 #if CONFIG_MISC_FIXES
-  pred_high[D45_PRED][TX_4X4] = vpx_highbd_d45e_predictor_4x4;
-  INIT_NO_4X4(pred_high[D45_PRED], highbd_d45);
-#else
-  INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45);
-#endif
+  INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207e);
+  INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45e);
   INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
+#else
+  INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207);
+  INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45);
+  INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
+#endif
   INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117);
   INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
   INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
@@ -109,6 +261,13 @@
 #undef intra_pred_allsizes
 }
 
+#if CONFIG_MISC_FIXES
+static inline void memset16(uint16_t *dst, int val, int n) {
+  while (n--)
+    *dst++ = val;
+}
+#endif
+
 #if CONFIG_VP9_HIGHBITDEPTH
 static void build_intra_predictors_high(const MACROBLOCKD *xd,
                                         const uint8_t *ref8,
@@ -117,22 +276,38 @@
                                         int dst_stride,
                                         PREDICTION_MODE mode,
                                         TX_SIZE tx_size,
+#if CONFIG_MISC_FIXES
+                                        int n_top_px, int n_topright_px,
+                                        int n_left_px, int n_bottomleft_px,
+#else
                                         int up_available,
                                         int left_available,
                                         int right_available,
+#endif
                                         int x, int y,
                                         int plane, int bd) {
   int i;
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+#if CONFIG_MISC_FIXES
   DECLARE_ALIGNED(16, uint16_t, left_col[32]);
+#else
+  DECLARE_ALIGNED(16, uint16_t, left_col[64]);
+#endif
   DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
   uint16_t *above_row = above_data + 16;
   const uint16_t *const_above_row = above_row;
   const int bs = 4 << tx_size;
+#if CONFIG_MISC_FIXES
+  const uint16_t *above_ref = ref - ref_stride;
+#else
   int frame_width, frame_height;
   int x0, y0;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
+#endif
+  const int need_left = extend_modes[mode] & NEED_LEFT;
+  const int need_above = extend_modes[mode] & NEED_ABOVE;
+  const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
   int base = 128 << (bd - 8);
   // 127 127 127 .. 127 127 127 127 127 127
   // 129  A   B  ..  Y   Z
@@ -140,129 +315,56 @@
   // 129  E   F  ..  U   V
   // 129  G   H  ..  S   T   T   T   T   T
 
-  // Get current frame pointer, width and height.
-  if (plane == 0) {
-    frame_width = xd->cur_buf->y_width;
-    frame_height = xd->cur_buf->y_height;
-  } else {
-    frame_width = xd->cur_buf->uv_width;
-    frame_height = xd->cur_buf->uv_height;
-  }
+#if CONFIG_MISC_FIXES
+  (void) x;
+  (void) y;
+  (void) plane;
+  (void) need_left;
+  (void) need_above;
+  (void) need_aboveright;
 
-  // Get block position in current frame.
-  x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
-  y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
-  // left
-  if (left_available) {
-    if (xd->mb_to_bottom_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (y0 + bs <= frame_height) {
-        for (i = 0; i < bs; ++i)
-          left_col[i] = ref[i * ref_stride - 1];
-      } else {
-        const int extend_bottom = frame_height - y0;
-        for (i = 0; i < extend_bottom; ++i)
-          left_col[i] = ref[i * ref_stride - 1];
-        for (; i < bs; ++i)
-          left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
-      }
-    } else {
-      /* faster path if the block does not need extension */
-      for (i = 0; i < bs; ++i)
+  // NEED_LEFT
+  if (extend_modes[mode] & NEED_LEFT) {
+    const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+    i = 0;
+    if (n_left_px > 0) {
+      for (; i < n_left_px; i++)
         left_col[i] = ref[i * ref_stride - 1];
-    }
-  } else {
-    // TODO(Peter): this value should probably change for high bitdepth
-    vpx_memset16(left_col, base + 1, bs);
-  }
-
-  // TODO(hkuang) do not extend 2*bs pixels for all modes.
-  // above
-  if (up_available) {
-    const uint16_t *above_ref = ref - ref_stride;
-    if (xd->mb_to_right_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (x0 + 2 * bs <= frame_width) {
-        if (right_available && bs == 4) {
-          memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
-        } else {
-          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
-        }
-      } else if (x0 + bs <= frame_width) {
-        const int r = frame_width - x0;
-        if (right_available && bs == 4) {
-          memcpy(above_row, above_ref, r * sizeof(above_row[0]));
-          vpx_memset16(above_row + r, above_row[r - 1],
-                       x0 + 2 * bs - frame_width);
-        } else {
-          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
-        }
-      } else if (x0 <= frame_width) {
-        const int r = frame_width - x0;
-        memcpy(above_row, above_ref, r * sizeof(above_row[0]));
-        vpx_memset16(above_row + r, above_row[r - 1],
-                       x0 + 2 * bs - frame_width);
+      if (need_bottom && n_bottomleft_px > 0) {
+        assert(i == bs);
+        for (; i < bs + n_bottomleft_px; i++)
+          left_col[i] = ref[i * ref_stride - 1];
       }
-      // TODO(Peter) this value should probably change for high bitdepth
-      above_row[-1] = left_available ? above_ref[-1] : (base+1);
+      if (i < (bs << need_bottom))
+        memset16(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
     } else {
-      /* faster path if the block does not need extension */
-      if (bs == 4 && right_available && left_available) {
-        const_above_row = above_ref;
-      } else {
-        memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-        if (bs == 4 && right_available)
-          memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
-        else
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
-        // TODO(Peter): this value should probably change for high bitdepth
-        above_row[-1] = left_available ? above_ref[-1] : (base+1);
-      }
+      memset16(left_col, base + 1, bs << need_bottom);
     }
-  } else {
-    vpx_memset16(above_row, base - 1, bs * 2);
-    // TODO(Peter): this value should probably change for high bitdepth
-    above_row[-1] = base - 1;
   }
 
-  // predict
-  if (mode == DC_PRED) {
-    dc_pred_high[left_available][up_available][tx_size](dst, dst_stride,
-                                                        const_above_row,
-                                                        left_col, xd->bd);
-  } else {
-    pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
-                             xd->bd);
+  // NEED_ABOVE
+  if (extend_modes[mode] & NEED_ABOVE) {
+    const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+    if (n_top_px > 0) {
+      memcpy(above_row, above_ref, n_top_px * 2);
+      i = n_top_px;
+      if (need_right && n_topright_px > 0) {
+        assert(n_top_px == bs);
+        memcpy(above_row + bs, above_ref + bs, n_topright_px * 2);
+        i += n_topright_px;
+      }
+      if (i < (bs << need_right))
+        memset16(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+    } else {
+      memset16(above_row, base - 1, bs << need_right);
+    }
   }
-}
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 
-static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
-                                   int ref_stride, uint8_t *dst, int dst_stride,
-                                   PREDICTION_MODE mode, TX_SIZE tx_size,
-                                   int up_available, int left_available,
-                                   int right_available, int x, int y,
-                                   int plane) {
-  int i;
-  DECLARE_ALIGNED(16, uint8_t, left_col[32]);
-  DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
-  uint8_t *above_row = above_data + 16;
-  const uint8_t *const_above_row = above_row;
-  const int bs = 4 << tx_size;
-  int frame_width, frame_height;
-  int x0, y0;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-
-  // 127 127 127 .. 127 127 127 127 127 127
-  // 129  A   B  ..  Y   Z
-  // 129  C   D  ..  W   X
-  // 129  E   F  ..  U   V
-  // 129  G   H  ..  S   T   T   T   T   T
-  // ..
-
+  if (extend_modes[mode] & NEED_ABOVELEFT) {
+    above_row[-1] = n_top_px > 0 ?
+        (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+  }
+#else
   // Get current frame pointer, width and height.
   if (plane == 0) {
     frame_width = xd->cur_buf->y_width;
@@ -277,7 +379,206 @@
   y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
 
   // NEED_LEFT
+  if (need_left) {
+    if (left_available) {
+      if (xd->mb_to_bottom_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (y0 + bs <= frame_height) {
+          for (i = 0; i < bs; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+        } else {
+          const int extend_bottom = frame_height - y0;
+          for (i = 0; i < extend_bottom; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+          for (; i < bs; ++i)
+            left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        for (i = 0; i < bs; ++i)
+          left_col[i] = ref[i * ref_stride - 1];
+      }
+    } else {
+      // TODO(Peter): this value should probably change for high bitdepth
+      vpx_memset16(left_col, base + 1, bs);
+    }
+  }
+
+  // NEED_ABOVE
+  if (need_above) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + bs <= frame_width) {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+          vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width);
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
+        } else {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+        }
+      }
+      above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+    } else {
+      vpx_memset16(above_row, base - 1, bs);
+      above_row[-1] = base - 1;
+    }
+  }
+
+  // NEED_ABOVERIGHT
+  if (need_aboveright) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + 2 * bs <= frame_width) {
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 + bs <= frame_width) {
+          const int r = frame_width - x0;
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+            vpx_memset16(above_row + r, above_row[r - 1],
+                         x0 + 2 * bs - frame_width);
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+          vpx_memset16(above_row + r, above_row[r - 1],
+                       x0 + 2 * bs - frame_width);
+        }
+        // TODO(Peter) this value should probably change for high bitdepth
+        above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
+        } else {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+          if (bs == 4 && right_available)
+            memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
+          else
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          // TODO(Peter): this value should probably change for high bitdepth
+          above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+        }
+      }
+    } else {
+      vpx_memset16(above_row, base - 1, bs * 2);
+      // TODO(Peter): this value should probably change for high bitdepth
+      above_row[-1] = base - 1;
+    }
+  }
+#endif
+
+  // predict
+  if (mode == DC_PRED) {
+#if CONFIG_MISC_FIXES
+    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
+                                                       const_above_row,
+                                                       left_col, xd->bd);
+#else
+    dc_pred_high[left_available][up_available][tx_size](dst, dst_stride,
+                                                        const_above_row,
+                                                        left_col, xd->bd);
+#endif
+  } else {
+    pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
+                             xd->bd);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
+                                   int ref_stride, uint8_t *dst, int dst_stride,
+                                   PREDICTION_MODE mode, TX_SIZE tx_size,
+#if CONFIG_MISC_FIXES
+                                   int n_top_px, int n_topright_px,
+                                   int n_left_px, int n_bottomleft_px,
+#else
+                                   int up_available, int left_available,
+                                   int right_available,
+#endif
+                                   int x, int y, int plane) {
+  int i;
+#if CONFIG_MISC_FIXES
+  DECLARE_ALIGNED(16, uint8_t, left_col[64]);
+  const uint8_t *above_ref = ref - ref_stride;
+#else
+  DECLARE_ALIGNED(16, uint8_t, left_col[32]);
+  int frame_width, frame_height;
+  int x0, y0;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+#endif
+  DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
+  uint8_t *above_row = above_data + 16;
+  const uint8_t *const_above_row = above_row;
+  const int bs = 4 << tx_size;
+
+  // 127 127 127 .. 127 127 127 127 127 127
+  // 129  A   B  ..  Y   Z
+  // 129  C   D  ..  W   X
+  // 129  E   F  ..  U   V
+  // 129  G   H  ..  S   T   T   T   T   T
+  // ..
+
+#if CONFIG_MISC_FIXES
+  (void) xd;
+  (void) x;
+  (void) y;
+  (void) plane;
+  assert(n_top_px >= 0);
+  assert(n_topright_px >= 0);
+  assert(n_left_px >= 0);
+  assert(n_bottomleft_px >= 0);
+#else
+  // Get current frame pointer, width and height.
+  if (plane == 0) {
+    frame_width = xd->cur_buf->y_width;
+    frame_height = xd->cur_buf->y_height;
+  } else {
+    frame_width = xd->cur_buf->uv_width;
+    frame_height = xd->cur_buf->uv_height;
+  }
+
+  // Get block position in current frame.
+  x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+  y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+#endif
+
+  // NEED_LEFT
   if (extend_modes[mode] & NEED_LEFT) {
+#if CONFIG_MISC_FIXES
+    const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+    i = 0;
+    if (n_left_px > 0) {
+      for (; i < n_left_px; i++)
+        left_col[i] = ref[i * ref_stride - 1];
+      if (need_bottom && n_bottomleft_px > 0) {
+        assert(i == bs);
+        for (; i < bs + n_bottomleft_px; i++)
+          left_col[i] = ref[i * ref_stride - 1];
+      }
+      if (i < (bs << need_bottom))
+        memset(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
+    } else {
+      memset(left_col, 129, bs << need_bottom);
+    }
+#else
     if (left_available) {
       if (xd->mb_to_bottom_edge < 0) {
         /* slower path if the block needs border extension */
@@ -299,10 +600,27 @@
     } else {
       memset(left_col, 129, bs);
     }
+#endif
   }
 
   // NEED_ABOVE
   if (extend_modes[mode] & NEED_ABOVE) {
+#if CONFIG_MISC_FIXES
+    const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+    if (n_top_px > 0) {
+      memcpy(above_row, above_ref, n_top_px);
+      i = n_top_px;
+      if (need_right && n_topright_px > 0) {
+        assert(n_top_px == bs);
+        memcpy(above_row + bs, above_ref + bs, n_topright_px);
+        i += n_topright_px;
+      }
+      if (i < (bs << need_right))
+        memset(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+    } else {
+      memset(above_row, 127, bs << need_right);
+    }
+#else
     if (up_available) {
       const uint8_t *above_ref = ref - ref_stride;
       if (xd->mb_to_right_edge < 0) {
@@ -327,8 +645,14 @@
       memset(above_row, 127, bs);
       above_row[-1] = 127;
     }
+#endif
   }
 
+#if CONFIG_MISC_FIXES
+  if (extend_modes[mode] & NEED_ABOVELEFT) {
+    above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
+  }
+#else
   // NEED_ABOVERIGHT
   if (extend_modes[mode] & NEED_ABOVERIGHT) {
     if (up_available) {
@@ -374,28 +698,60 @@
       above_row[-1] = 127;
     }
   }
+#endif
 
   // predict
   if (mode == DC_PRED) {
+#if CONFIG_MISC_FIXES
+    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
+                                                  const_above_row, left_col);
+#else
     dc_pred[left_available][up_available][tx_size](dst, dst_stride,
                                                    const_above_row, left_col);
+#endif
   } else {
     pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
   }
 }
 
-void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in,
+void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
                              TX_SIZE tx_size, PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride,
                              uint8_t *dst, int dst_stride,
                              int aoff, int loff, int plane) {
-  const int bw = (1 << bwl_in);
   const int txw = (1 << tx_size);
   const int have_top = loff || xd->up_available;
   const int have_left = aoff || xd->left_available;
-  const int have_right = (aoff + txw) < bw;
   const int x = aoff * 4;
   const int y = loff * 4;
+#if CONFIG_MISC_FIXES
+  const int bw = VPXMAX(2, 1 << bwl_in);
+  const int bh = VPXMAX(2, 1 << bhl_in);
+  const int mi_row = -xd->mb_to_top_edge >> 6;
+  const int mi_col = -xd->mb_to_left_edge >> 6;
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const int right_available =
+      mi_col + (bw >> !pd->subsampling_x) < xd->tile.mi_col_end;
+  const int have_right = vp10_has_right(bsize, mi_row, mi_col,
+                                        right_available,
+                                        tx_size, loff, aoff,
+                                        pd->subsampling_x);
+  const int have_bottom = vp10_has_bottom(bsize, mi_row, mi_col,
+                                          xd->mb_to_bottom_edge > 0,
+                                          tx_size, loff, aoff,
+                                          pd->subsampling_y);
+  const int wpx = 4 * bw;
+  const int hpx = 4 * bh;
+  const int txpx = 4 * txw;
+
+  int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + (wpx - x - txpx);
+  int yd =
+      (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + (hpx - y - txpx);
+#else
+  const int bw = (1 << bwl_in);
+  const int have_right = (aoff + txw) < bw;
+#endif  // CONFIG_MISC_FIXES
 
   if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
     const int bs = 4 * (1 << tx_size);
@@ -431,6 +787,28 @@
     return;
   }
 
+#if CONFIG_MISC_FIXES
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
+                                tx_size,
+                                have_top ? VPXMIN(txpx, xr + txpx) : 0,
+                                have_top && have_right ? VPXMIN(txpx, xr) : 0,
+                                have_left ? VPXMIN(txpx, yd + txpx) : 0,
+                                have_bottom && have_left ? VPXMIN(txpx, yd) : 0,
+                                x, y, plane, xd->bd);
+    return;
+  }
+#endif
+  build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
+                         tx_size,
+                         have_top ? VPXMIN(txpx, xr + txpx) : 0,
+                         have_top && have_right ? VPXMIN(txpx, xr) : 0,
+                         have_left ? VPXMIN(txpx, yd + txpx) : 0,
+                         have_bottom && have_left ? VPXMIN(txpx, yd) : 0,
+                         x, y, plane);
+#else  // CONFIG_MISC_FIXES
+  (void) bhl_in;
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
@@ -441,6 +819,7 @@
 #endif
   build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
                          have_top, have_left, have_right, x, y, plane);
+#endif  // CONFIG_MISC_FIXES
 }
 
 void vp10_init_intra_predictors(void) {

diff --git a/vp10/common/reconintra.h b/vp10/common/reconintra.h
index ef551e6..f451fb8 100644
--- a/vp10/common/reconintra.h
+++ b/vp10/common/reconintra.h

@@ -20,7 +20,7 @@
 
 void vp10_init_intra_predictors(void);
 
-void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in,
+void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
                              TX_SIZE tx_size, PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride,
                              uint8_t *dst, int dst_stride,

diff --git a/vp10/common/seg_common.h b/vp10/common/seg_common.h
index 97b875c..cd38e8e 100644
--- a/vp10/common/seg_common.h
+++ b/vp10/common/seg_common.h

@@ -42,13 +42,15 @@
   uint8_t abs_delta;
   uint8_t temporal_update;
 
-  vpx_prob tree_probs[SEG_TREE_PROBS];
-  vpx_prob pred_probs[PREDICTION_PROBS];
-
   int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
   unsigned int feature_mask[MAX_SEGMENTS];
 };
 
+struct segmentation_probs {
+  vpx_prob tree_probs[SEG_TREE_PROBS];
+  vpx_prob pred_probs[PREDICTION_PROBS];
+};
+
 static INLINE int segfeature_active(const struct segmentation *seg,
                                     int segment_id,
                                     SEG_LVL_FEATURES feature_id) {

diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index bbc6d11..e83cb8e 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c

@@ -434,4 +434,15 @@
     for (i = 0; i < MV_FP_SIZE; i++)
       comps->fp[i] += comps_t->fp[i];
   }
+
+#if CONFIG_MISC_FIXES
+  for (i = 0; i < PREDICTION_PROBS; i++)
+    for (j = 0; j < 2; j++)
+      cm->counts.seg.pred[i][j] += counts->seg.pred[i][j];
+
+  for (i = 0; i < MAX_SEGMENTS; i++) {
+    cm->counts.seg.tree_total[i] += counts->seg.tree_total[i];
+    cm->counts.seg.tree_mispred[i] += counts->seg.tree_mispred[i];
+  }
+#endif
 }

diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index cbc70ab..5c95e16 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c

@@ -47,6 +47,8 @@
 
 static int is_compound_reference_allowed(const VP10_COMMON *cm) {
   int i;
+  if (frame_is_intra_only(cm))
+    return 0;
   for (i = 1; i < REFS_PER_FRAME; ++i)
     if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1])
       return 1;
@@ -213,6 +215,7 @@
                                           int eob, int block) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block);
+  const int seg_id = xd->mi[0]->mbmi.segment_id;
   if (eob > 0) {
     tran_low_t *const dqcoeff = pd->dqcoeff;
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -220,7 +223,7 @@
       switch (tx_size) {
         case TX_4X4:
           vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, xd->bd,
-                                       tx_type, xd->lossless);
+                                       tx_type, xd->lossless[seg_id]);
           break;
         case TX_8X8:
           vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, xd->bd,
@@ -243,7 +246,7 @@
       switch (tx_size) {
         case TX_4X4:
           vp10_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, tx_type,
-                                xd->lossless);
+                                xd->lossless[seg_id]);
           break;
         case TX_8X8:
           vp10_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, tx_type);
@@ -281,6 +284,7 @@
                                           uint8_t *dst, int stride,
                                           int eob) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
+  const int seg_id = xd->mi[0]->mbmi.segment_id;
   if (eob > 0) {
     tran_low_t *const dqcoeff = pd->dqcoeff;
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -288,7 +292,7 @@
       switch (tx_size) {
         case TX_4X4:
           vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, xd->bd,
-                                       tx_type, xd->lossless);
+                                       tx_type, xd->lossless[seg_id]);
           break;
         case TX_8X8:
           vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, xd->bd,
@@ -311,7 +315,7 @@
       switch (tx_size) {
         case TX_4X4:
           vp10_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, tx_type,
-                                xd->lossless);
+                                xd->lossless[seg_id]);
           break;
         case TX_8X8:
           vp10_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, tx_type);
@@ -360,7 +364,7 @@
     if (plane == 0)
       mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
 
-  vp10_predict_intra_block(xd, pd->n4_wl, tx_size, mode,
+  vp10_predict_intra_block(xd, pd->n4_wl, pd->n4_hl, tx_size, mode,
                           dst, pd->dst.stride, dst, pd->dst.stride,
                           col, row, plane);
 
@@ -922,11 +926,11 @@
   memset(left_ctx, partition_context_lookup[subsize].left, bw);
 }
 
-static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col,
-                                     vpx_reader *r,
+static PARTITION_TYPE read_partition(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                     int mi_row, int mi_col, vpx_reader *r,
                                      int has_rows, int has_cols, int bsl) {
   const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl);
-  const vpx_prob *const probs = get_partition_probs(xd, ctx);
+  const vpx_prob *const probs = cm->fc->partition_prob[ctx];
   FRAME_COUNTS *counts = xd->counts;
   PARTITION_TYPE p;
 
@@ -961,7 +965,7 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  partition = read_partition(xd, mi_row, mi_col, r, has_rows, has_cols,
+  partition = read_partition(cm, xd, mi_row, mi_col, r, has_rows, has_cols,
                              n8x8_l2);
   subsize = subsize_lookup[partition][bsize];  // get_subsize(bsize, partition);
   if (!hbs) {
@@ -1047,6 +1051,9 @@
 static void setup_segmentation(VP10_COMMON *const cm,
                                struct vpx_read_bit_buffer *rb) {
   struct segmentation *const seg = &cm->seg;
+#if !CONFIG_MISC_FIXES
+  struct segmentation_probs *const segp = &cm->segp;
+#endif
   int i, j;
 
   seg->update_map = 0;
@@ -1063,23 +1070,26 @@
     seg->update_map = vpx_rb_read_bit(rb);
   }
   if (seg->update_map) {
+#if !CONFIG_MISC_FIXES
     for (i = 0; i < SEG_TREE_PROBS; i++)
-      seg->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
-                                               : MAX_PROB;
-
+      segp->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
+                                                : MAX_PROB;
+#endif
     if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
       seg->temporal_update = 0;
     } else {
       seg->temporal_update = vpx_rb_read_bit(rb);
     }
+#if !CONFIG_MISC_FIXES
     if (seg->temporal_update) {
       for (i = 0; i < PREDICTION_PROBS; i++)
-        seg->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
-                                                 : MAX_PROB;
+        segp->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
+                                                  : MAX_PROB;
     } else {
       for (i = 0; i < PREDICTION_PROBS; i++)
-        seg->pred_probs[i] = MAX_PROB;
+        segp->pred_probs[i] = MAX_PROB;
     }
+#endif
   }
 
   // Segmentation data update
@@ -1138,15 +1148,26 @@
 
 static void setup_quantization(VP10_COMMON *const cm, MACROBLOCKD *const xd,
                                struct vpx_read_bit_buffer *rb) {
+  int i;
+
   cm->base_qindex = vpx_rb_read_literal(rb, QINDEX_BITS);
   cm->y_dc_delta_q = read_delta_q(rb);
   cm->uv_dc_delta_q = read_delta_q(rb);
   cm->uv_ac_delta_q = read_delta_q(rb);
   cm->dequant_bit_depth = cm->bit_depth;
-  xd->lossless = cm->base_qindex == 0 &&
-                 cm->y_dc_delta_q == 0 &&
-                 cm->uv_dc_delta_q == 0 &&
-                 cm->uv_ac_delta_q == 0;
+  for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) {
+#if CONFIG_MISC_FIXES
+    const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
+#endif
+    xd->lossless[i] = cm->y_dc_delta_q == 0 &&
+#if CONFIG_MISC_FIXES
+                      qindex == 0 &&
+#else
+                      cm->base_qindex == 0 &&
+#endif
+                      cm->uv_dc_delta_q == 0 &&
+                      cm->uv_ac_delta_q == 0;
+  }
 
 #if CONFIG_VP9_HIGHBITDEPTH
   xd->bd = (int)cm->bit_depth;
@@ -1429,9 +1450,9 @@
     if (decrypt_cb) {
       uint8_t be_data[4];
       decrypt_cb(decrypt_state, *data, be_data, tile_sz_mag + 1);
-      size = mem_get_varsize(be_data, tile_sz_mag);
+      size = mem_get_varsize(be_data, tile_sz_mag) + CONFIG_MISC_FIXES;
     } else {
-      size = mem_get_varsize(*data, tile_sz_mag);
+      size = mem_get_varsize(*data, tile_sz_mag) + CONFIG_MISC_FIXES;
     }
     *data += tile_sz_mag + 1;
 
@@ -1970,6 +1991,9 @@
       if (!vp10_read_sync_code(rb))
         vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                            "Invalid frame sync code");
+#if CONFIG_MISC_FIXES
+      read_bitdepth_colorspace_sampling(cm, rb);
+#else
       if (cm->profile > PROFILE_0) {
         read_bitdepth_colorspace_sampling(cm, rb);
       } else {
@@ -1985,6 +2009,7 @@
         cm->use_highbitdepth = 0;
 #endif
       }
+#endif
 
       pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
       setup_frame_size(cm, rb);
@@ -2092,7 +2117,8 @@
   setup_segmentation(cm, rb);
   setup_segmentation_dequant(cm);
 #if CONFIG_MISC_FIXES
-  cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(rb);
+  cm->tx_mode = (!cm->seg.enabled && xd->lossless[0]) ? ONLY_4X4
+                                                      : read_tx_mode(rb);
   cm->reference_mode = read_frame_reference_mode(cm, rb);
 #endif
 
@@ -2114,7 +2140,7 @@
 #endif
   FRAME_CONTEXT *const fc = cm->fc;
   vpx_reader r;
-  int k;
+  int k, i, j;
 
   if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb,
                       pbi->decrypt_state))
@@ -2122,7 +2148,7 @@
                        "Failed to allocate bool decoder 0");
 
 #if !CONFIG_MISC_FIXES
-  cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r);
+  cm->tx_mode = xd->lossless[0] ? ONLY_4X4 : read_tx_mode(&r);
 #endif
   if (cm->tx_mode == TX_MODE_SELECT)
     read_tx_mode_probs(&fc->tx_probs, &r);
@@ -2131,9 +2157,35 @@
   for (k = 0; k < SKIP_CONTEXTS; ++k)
     vp10_diff_update_prob(&r, &fc->skip_probs[k]);
 
-  if (!frame_is_intra_only(cm)) {
+#if CONFIG_MISC_FIXES
+  if (cm->seg.enabled) {
+    if (cm->seg.temporal_update) {
+      for (k = 0; k < PREDICTION_PROBS; k++)
+        vp10_diff_update_prob(&r, &cm->fc->seg.pred_probs[k]);
+    }
+    for (k = 0; k < MAX_SEGMENTS - 1; k++)
+      vp10_diff_update_prob(&r, &cm->fc->seg.tree_probs[k]);
+  }
+
+  for (j = 0; j < INTRA_MODES; j++)
+    for (i = 0; i < INTRA_MODES - 1; ++i)
+      vp10_diff_update_prob(&r, &fc->uv_mode_prob[j][i]);
+
+  for (j = 0; j < PARTITION_CONTEXTS; ++j)
+    for (i = 0; i < PARTITION_TYPES - 1; ++i)
+      vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
+#endif
+
+  if (frame_is_intra_only(cm)) {
+    vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob);
+#if CONFIG_MISC_FIXES
+    for (k = 0; k < INTRA_MODES; k++)
+      for (j = 0; j < INTRA_MODES; j++)
+        for (i = 0; i < INTRA_MODES - 1; ++i)
+          vp10_diff_update_prob(&r, &cm->kf_y_prob[k][j][i]);
+#endif
+  } else {
     nmv_context *const nmvc = &fc->nmvc;
-    int i, j;
 
     read_inter_mode_probs(fc, &r);
 
@@ -2154,15 +2206,11 @@
       for (i = 0; i < INTRA_MODES - 1; ++i)
         vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
 
-#if CONFIG_MISC_FIXES
-    for (j = 0; j < INTRA_MODES; j++)
-      for (i = 0; i < INTRA_MODES - 1; ++i)
-        vp10_diff_update_prob(&r, &fc->uv_mode_prob[j][i]);
-#endif
-
+#if !CONFIG_MISC_FIXES
     for (j = 0; j < PARTITION_CONTEXTS; ++j)
       for (i = 0; i < PARTITION_TYPES - 1; ++i)
         vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
+#endif
 
     read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
   }

diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index e82ddd6..38ea073 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c

@@ -73,8 +73,9 @@
   return NEARESTMV + mode;
 }
 
-static int read_segment_id(vpx_reader *r, const struct segmentation *seg) {
-  return vpx_read_tree(r, vp10_segment_tree, seg->tree_probs);
+static int read_segment_id(vpx_reader *r,
+    const struct segmentation_probs *segp) {
+  return vpx_read_tree(r, vp10_segment_tree, segp->tree_probs);
 }
 
 static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
@@ -129,18 +130,32 @@
       cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
 }
 
-static int read_intra_segment_id(VP10_COMMON *const cm, int mi_offset,
-                                 int x_mis, int y_mis,
+static int read_intra_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+                                 int mi_offset, int x_mis, int y_mis,
                                  vpx_reader *r) {
   struct segmentation *const seg = &cm->seg;
+#if CONFIG_MISC_FIXES
+  FRAME_COUNTS *counts = xd->counts;
+  struct segmentation_probs *const segp = &cm->fc->seg;
+#else
+  struct segmentation_probs *const segp = &cm->segp;
+#endif
   int segment_id;
 
+#if !CONFIG_MISC_FIXES
+  (void) xd;
+#endif
+
   if (!seg->enabled)
     return 0;  // Default for disabled segmentation
 
   assert(seg->update_map && !seg->temporal_update);
 
-  segment_id = read_segment_id(r, seg);
+  segment_id = read_segment_id(r, segp);
+#if CONFIG_MISC_FIXES
+  if (counts)
+    ++counts->seg.tree_total[segment_id];
+#endif
   set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
   return segment_id;
 }
@@ -160,6 +175,12 @@
 static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd,
                                  int mi_row, int mi_col, vpx_reader *r) {
   struct segmentation *const seg = &cm->seg;
+#if CONFIG_MISC_FIXES
+  FRAME_COUNTS *counts = xd->counts;
+  struct segmentation_probs *const segp = &cm->fc->seg;
+#else
+  struct segmentation_probs *const segp = &cm->segp;
+#endif
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   int predicted_segment_id, segment_id;
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
@@ -184,12 +205,28 @@
   }
 
   if (seg->temporal_update) {
-    const vpx_prob pred_prob = vp10_get_pred_prob_seg_id(seg, xd);
+    const int ctx = vp10_get_pred_context_seg_id(xd);
+    const vpx_prob pred_prob = segp->pred_probs[ctx];
     mbmi->seg_id_predicted = vpx_read(r, pred_prob);
-    segment_id = mbmi->seg_id_predicted ? predicted_segment_id
-                                        : read_segment_id(r, seg);
+#if CONFIG_MISC_FIXES
+    if (counts)
+      ++counts->seg.pred[ctx][mbmi->seg_id_predicted];
+#endif
+    if (mbmi->seg_id_predicted) {
+      segment_id = predicted_segment_id;
+    } else {
+      segment_id = read_segment_id(r, segp);
+#if CONFIG_MISC_FIXES
+      if (counts)
+        ++counts->seg.tree_mispred[segment_id];
+#endif
+    }
   } else {
-    segment_id = read_segment_id(r, seg);
+    segment_id = read_segment_id(r, segp);
+#if CONFIG_MISC_FIXES
+    if (counts)
+      ++counts->seg.tree_total[segment_id];
+#endif
   }
   set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
   return segment_id;
@@ -258,7 +295,7 @@
   const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
   const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
 
-  mbmi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r);
+  mbmi->segment_id = read_intra_segment_id(cm, xd, mi_offset, x_mis, y_mis, r);
   mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
   mbmi->tx_size = read_tx_size(cm, xd, 1, r);
   mbmi->ref_frame[0] = INTRA_FRAME;
@@ -268,27 +305,27 @@
     case BLOCK_4X4:
       for (i = 0; i < 4; ++i)
         mi->bmi[i].as_mode =
-            read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i));
+            read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, i));
       mbmi->mode = mi->bmi[3].as_mode;
       break;
     case BLOCK_4X8:
       mi->bmi[0].as_mode = mi->bmi[2].as_mode =
-          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0));
+          read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
       mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
-          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1));
+          read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 1));
       break;
     case BLOCK_8X4:
       mi->bmi[0].as_mode = mi->bmi[1].as_mode =
-          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0));
+          read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
       mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
-          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2));
+          read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 2));
       break;
     default:
       mbmi->mode = read_intra_mode(r,
-                                   get_y_mode_probs(mi, above_mi, left_mi, 0));
+          get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
   }
 
-  mbmi->uv_mode = read_intra_mode(r, vp10_kf_uv_mode_prob[mbmi->mode]);
+  mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
 
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;

diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index 03a81f5..d8864d2 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c

@@ -506,6 +506,7 @@
 
         for (j = 0; j < mag; ++j)
           this_sz |= (*x++) << (j * 8);
+        this_sz += CONFIG_MISC_FIXES;
         sizes[i] = this_sz;
 #if CONFIG_MISC_FIXES
         frame_sz_sum += this_sz;

diff --git a/vp10/decoder/dsubexp.c b/vp10/decoder/dsubexp.c
index dbcdb49..36c1917 100644
--- a/vp10/decoder/dsubexp.c
+++ b/vp10/decoder/dsubexp.c

@@ -23,13 +23,13 @@
 
 static int decode_uniform(vpx_reader *r) {
   const int l = 8;
-  const int m = (1 << l) - 191;
+  const int m = (1 << l) - 191 + CONFIG_MISC_FIXES;
   const int v = vpx_read_literal(r, l - 1);
   return v < m ?  v : (v << 1) - m + vpx_read_bit(r);
 }
 
 static int inv_remap_prob(int v, int m) {
-  static int inv_map_table[MAX_PROB] = {
+  static uint8_t inv_map_table[MAX_PROB - CONFIG_MISC_FIXES] = {
       7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176, 189,
     202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,  10,  11,
      12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,  25,  26,  27,
@@ -46,7 +46,10 @@
     191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
     207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222,
     223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
-    239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 253
+    239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
+#if !CONFIG_MISC_FIXES
+    253
+#endif
   };
   assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0])));
   v = inv_map_table[v];

diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index ca9b17b..d9b4be4 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c

@@ -48,7 +48,7 @@
     {0, 1}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {63, 6},
 };
 static const struct vp10_token
-palette_color_encodings[PALETTE_MAX_SIZE - 1][8] = {
+palette_color_encodings[PALETTE_MAX_SIZE - 1][PALETTE_MAX_SIZE] = {
     {{0, 1}, {1, 1}},  // 2 colors
     {{0, 1}, {2, 2}, {3, 2}},  // 3 colors
     {{0, 1}, {2, 2}, {6, 3}, {7, 3}},  // 4 colors
@@ -251,9 +251,10 @@
 }
 
 static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
+                             const struct segmentation_probs *segp,
                              int segment_id) {
   if (seg->enabled && seg->update_map)
-    vp10_write_tree(w, vp10_segment_tree, seg->tree_probs, segment_id, 3, 0);
+    vp10_write_tree(w, vp10_segment_tree, segp->tree_probs, segment_id, 3, 0);
 }
 
 // This function encodes the reference frame
@@ -299,6 +300,11 @@
   const MACROBLOCK *const x = &cpi->td.mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct segmentation *const seg = &cm->seg;
+#if CONFIG_MISC_FIXES
+  const struct segmentation_probs *const segp = &cm->fc->seg;
+#else
+  const struct segmentation_probs *const segp = &cm->segp;
+#endif
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   const PREDICTION_MODE mode = mbmi->mode;
@@ -312,12 +318,12 @@
   if (seg->update_map) {
     if (seg->temporal_update) {
       const int pred_flag = mbmi->seg_id_predicted;
-      vpx_prob pred_prob = vp10_get_pred_prob_seg_id(seg, xd);
+      vpx_prob pred_prob = vp10_get_pred_prob_seg_id(segp, xd);
       vpx_write(w, pred_flag, pred_prob);
       if (!pred_flag)
-        write_segment_id(w, seg, segment_id);
+        write_segment_id(w, seg, segp, segment_id);
     } else {
-      write_segment_id(w, seg, segment_id);
+      write_segment_id(w, seg, segp, segment_id);
     }
   }
 
@@ -429,6 +435,11 @@
 static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
                               MODE_INFO **mi_8x8, vpx_writer *w) {
   const struct segmentation *const seg = &cm->seg;
+#if CONFIG_MISC_FIXES
+  const struct segmentation_probs *const segp = &cm->fc->seg;
+#else
+  const struct segmentation_probs *const segp = &cm->segp;
+#endif
   const MODE_INFO *const mi = mi_8x8[0];
   const MODE_INFO *const above_mi = xd->above_mi;
   const MODE_INFO *const left_mi = xd->left_mi;
@@ -436,7 +447,7 @@
   const BLOCK_SIZE bsize = mbmi->sb_type;
 
   if (seg->update_map)
-    write_segment_id(w, seg, mbmi->segment_id);
+    write_segment_id(w, seg, segp, mbmi->segment_id);
 
   write_skip(cm, xd, mbmi->segment_id, mi, w);
 
@@ -444,7 +455,8 @@
     write_selected_tx_size(cm, xd, w);
 
   if (bsize >= BLOCK_8X8) {
-    write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
+    write_intra_mode(w, mbmi->mode,
+                     get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
   } else {
     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -454,12 +466,12 @@
       for (idx = 0; idx < 2; idx += num_4x4_w) {
         const int block = idy * 2 + idx;
         write_intra_mode(w, mi->bmi[block].as_mode,
-                         get_y_mode_probs(mi, above_mi, left_mi, block));
+                         get_y_mode_probs(cm, mi, above_mi, left_mi, block));
       }
     }
   }
 
-  write_intra_mode(w, mbmi->uv_mode, vp10_kf_uv_mode_prob[mbmi->mode]);
+  write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mbmi->mode]);
 
   if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools &&
       mbmi->mode == DC_PRED)
@@ -514,7 +526,7 @@
                             int hbs, int mi_row, int mi_col,
                             PARTITION_TYPE p, BLOCK_SIZE bsize, vpx_writer *w) {
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-  const vpx_prob *const probs = xd->partition_probs[ctx];
+  const vpx_prob *const probs = cm->fc->partition_prob[ctx];
   const int has_rows = (mi_row + hbs) < cm->mi_rows;
   const int has_cols = (mi_col + hbs) < cm->mi_cols;
 
@@ -592,12 +604,9 @@
 static void write_modes(VP10_COMP *cpi,
                         const TileInfo *const tile, vpx_writer *w,
                         TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
-  const VP10_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   int mi_row, mi_col;
 
-  set_partition_probs(cm, xd);
-
   for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
        mi_row += MI_BLOCK_SIZE) {
     vp10_zero(xd->left_seg_context);
@@ -859,6 +868,9 @@
   int i, j;
 
   const struct segmentation *seg = &cm->seg;
+#if !CONFIG_MISC_FIXES
+  const struct segmentation_probs *segp = &cm->segp;
+#endif
 
   vpx_wb_write_bit(wb, seg->enabled);
   if (!seg->enabled)
@@ -873,14 +885,16 @@
   if (seg->update_map) {
     // Select the coding strategy (temporal or spatial)
     vp10_choose_segmap_coding_method(cm, xd);
+#if !CONFIG_MISC_FIXES
     // Write out probabilities used to decode unpredicted  macro-block segments
     for (i = 0; i < SEG_TREE_PROBS; i++) {
-      const int prob = seg->tree_probs[i];
+      const int prob = segp->tree_probs[i];
       const int update = prob != MAX_PROB;
       vpx_wb_write_bit(wb, update);
       if (update)
         vpx_wb_write_literal(wb, prob, 8);
     }
+#endif
 
     // Write out the chosen coding method.
     if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
@@ -888,15 +902,18 @@
     } else {
       assert(seg->temporal_update == 0);
     }
+
+#if !CONFIG_MISC_FIXES
     if (seg->temporal_update) {
       for (i = 0; i < PREDICTION_PROBS; i++) {
-        const int prob = seg->pred_probs[i];
+        const int prob = segp->pred_probs[i];
         const int update = prob != MAX_PROB;
         vpx_wb_write_bit(wb, update);
         if (update)
           vpx_wb_write_literal(wb, prob, 8);
       }
     }
+#endif
   }
 
   // Segmentation data
@@ -925,6 +942,27 @@
 }
 
 #if CONFIG_MISC_FIXES
+static void update_seg_probs(VP10_COMP *cpi, vpx_writer *w) {
+  VP10_COMMON *cm = &cpi->common;
+
+  if (!cpi->common.seg.enabled)
+    return;
+
+  if (cpi->common.seg.temporal_update) {
+    int i;
+
+    for (i = 0; i < PREDICTION_PROBS; i++)
+      vp10_cond_prob_diff_update(w, &cm->fc->seg.pred_probs[i],
+          cm->counts.seg.pred[i]);
+
+    prob_diff_update(vp10_segment_tree, cm->fc->seg.tree_probs,
+        cm->counts.seg.tree_mispred, MAX_SEGMENTS, w);
+  } else {
+    prob_diff_update(vp10_segment_tree, cm->fc->seg.tree_probs,
+        cm->counts.seg.tree_total, MAX_SEGMENTS, w);
+  }
+}
+
 static void write_txfm_mode(TX_MODE mode, struct vpx_write_bit_buffer *wb) {
   vpx_wb_write_bit(wb, mode == TX_MODE_SELECT);
   if (mode != TX_MODE_SELECT)
@@ -1079,9 +1117,13 @@
       assert(tok == tok_end);
       vpx_stop_encode(&residual_bc);
       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
+        unsigned int tile_sz;
+
         // size of this tile
-        mem_put_le32(data_ptr + total_size, residual_bc.pos);
-        max_tile = max_tile > residual_bc.pos ? max_tile : residual_bc.pos;
+        assert(residual_bc.pos > 0);
+        tile_sz = residual_bc.pos - CONFIG_MISC_FIXES;
+        mem_put_le32(data_ptr + total_size, tile_sz);
+        max_tile = max_tile > tile_sz ? max_tile : tile_sz;
         total_size += 4;
       }
 
@@ -1246,10 +1288,14 @@
     if (cm->intra_only) {
       write_sync_code(wb);
 
+#if CONFIG_MISC_FIXES
+      write_bitdepth_colorspace_sampling(cm, wb);
+#else
       // Note for profile 0, 420 8bpp is assumed.
       if (cm->profile > PROFILE_0) {
         write_bitdepth_colorspace_sampling(cm, wb);
       }
+#endif
 
       vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
       write_frame_size(cm, wb);
@@ -1288,7 +1334,7 @@
   encode_quantization(cm, wb);
   encode_segmentation(cm, xd, wb);
 #if CONFIG_MISC_FIXES
-  if (xd->lossless)
+  if (!cm->seg.enabled && xd->lossless[0])
     cm->tx_mode = TX_4X4;
   else
     write_txfm_mode(cm->tx_mode, wb);
@@ -1310,11 +1356,15 @@
   FRAME_CONTEXT *const fc = cm->fc;
   FRAME_COUNTS *counts = cpi->td.counts;
   vpx_writer header_bc;
+  int i;
+#if CONFIG_MISC_FIXES
+  int j;
+#endif
 
   vpx_start_encode(&header_bc, data);
 
 #if !CONFIG_MISC_FIXES
-  if (cpi->td.mb.e_mbd.lossless)
+  if (cpi->td.mb.e_mbd.lossless[0])
     cm->tx_mode = TX_4X4;
   else
     update_txfm_probs(cm, &header_bc, counts);
@@ -1323,10 +1373,27 @@
 #endif
   update_coef_probs(cpi, &header_bc);
   update_skip_probs(cm, &header_bc, counts);
+#if CONFIG_MISC_FIXES
+  update_seg_probs(cpi, &header_bc);
 
-  if (!frame_is_intra_only(cm)) {
-    int i;
+  for (i = 0; i < INTRA_MODES; ++i)
+    prob_diff_update(vp10_intra_mode_tree, fc->uv_mode_prob[i],
+                     counts->uv_mode[i], INTRA_MODES, &header_bc);
 
+  for (i = 0; i < PARTITION_CONTEXTS; ++i)
+    prob_diff_update(vp10_partition_tree, fc->partition_prob[i],
+                     counts->partition[i], PARTITION_TYPES, &header_bc);
+#endif
+
+  if (frame_is_intra_only(cm)) {
+    vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob);
+#if CONFIG_MISC_FIXES
+    for (i = 0; i < INTRA_MODES; ++i)
+      for (j = 0; j < INTRA_MODES; ++j)
+        prob_diff_update(vp10_intra_mode_tree, cm->kf_y_prob[i][j],
+                         counts->kf_y_mode[i][j], INTRA_MODES, &header_bc);
+#endif
+  } else {
     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
       prob_diff_update(vp10_inter_mode_tree, cm->fc->inter_mode_probs[i],
                        counts->inter_mode[i], INTER_MODES, &header_bc);
@@ -1377,15 +1444,11 @@
       prob_diff_update(vp10_intra_mode_tree, cm->fc->y_mode_prob[i],
                        counts->y_mode[i], INTRA_MODES, &header_bc);
 
-#if CONFIG_MISC_FIXES
-    for (i = 0; i < INTRA_MODES; ++i)
-      prob_diff_update(vp10_intra_mode_tree, cm->fc->uv_mode_prob[i],
-                       counts->uv_mode[i], INTRA_MODES, &header_bc);
-#endif
-
+#if !CONFIG_MISC_FIXES
     for (i = 0; i < PARTITION_CONTEXTS; ++i)
       prob_diff_update(vp10_partition_tree, fc->partition_prob[i],
                        counts->partition[i], PARTITION_TYPES, &header_bc);
+#endif
 
     vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc,
                         &counts->mv);
@@ -1408,17 +1471,17 @@
     if (n == n_tiles - 1) {
       tile_sz = sz - rpos;
     } else {
-      tile_sz = mem_get_le32(&dest[rpos]);
+      tile_sz = mem_get_le32(&dest[rpos]) + 1;
       rpos += 4;
       switch (mag) {
         case 0:
-          dest[wpos] = tile_sz;
+          dest[wpos] = tile_sz - 1;
           break;
         case 1:
-          mem_put_le16(&dest[wpos], tile_sz);
+          mem_put_le16(&dest[wpos], tile_sz - 1);
           break;
         case 2:
-          mem_put_le24(&dest[wpos], tile_sz);
+          mem_put_le24(&dest[wpos], tile_sz - 1);
           break;
         case 3:  // remuxing should only happen if mag < 3
         default:

diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c
index 532e82c..6c056d2 100644
--- a/vp10/encoder/context_tree.c
+++ b/vp10/encoder/context_tree.c

@@ -30,13 +30,13 @@
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     for (k = 0; k < 3; ++k) {
       CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
-                      vpx_memalign(16, num_blk * sizeof(*ctx->eobs[i][k])));
+                      vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
       ctx->coeff_pbuf[i][k]   = ctx->coeff[i][k];
       ctx->qcoeff_pbuf[i][k]  = ctx->qcoeff[i][k];
       ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];

diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index bb16c10..f303b01 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c

@@ -2203,7 +2203,7 @@
         // terminated for current branch of the partition search tree.
         // The dist & rate thresholds are set to 0 at speed 0 to disable the
         // early termination at that speed.
-        if (!x->e_mbd.lossless &&
+        if (!x->e_mbd.lossless[xd->mi[0]->mbmi.segment_id] &&
             (ctx->skippable && best_rdc.dist < dist_breakout_thr &&
             best_rdc.rate < rate_breakout_thr)) {
           do_split = 0;
@@ -2588,7 +2588,7 @@
 }
 
 static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) {
-  if (xd->lossless)
+  if (!cpi->common.seg.enabled && xd->lossless[0])
     return ONLY_4X4;
   if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
     return ALLOW_32X32;
@@ -2695,6 +2695,7 @@
   VP10_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   RD_COUNTS *const rdc = &cpi->td.rd_counts;
+  int i;
 
   xd->mi = cm->mi_grid_visible;
   xd->mi[0] = cm->mi;
@@ -2704,12 +2705,21 @@
   vp10_zero(rdc->comp_pred_diff);
   vp10_zero(rdc->filter_diff);
 
-  xd->lossless = cm->base_qindex == 0 &&
-                 cm->y_dc_delta_q == 0 &&
-                 cm->uv_dc_delta_q == 0 &&
-                 cm->uv_ac_delta_q == 0;
+  for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) {
+#if CONFIG_MISC_FIXES
+    const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
+#endif
+    xd->lossless[i] = cm->y_dc_delta_q == 0 &&
+#if CONFIG_MISC_FIXES
+                      qindex == 0 &&
+#else
+                      cm->base_qindex == 0 &&
+#endif
+                      cm->uv_dc_delta_q == 0 &&
+                      cm->uv_ac_delta_q == 0;
+  }
 
-  if (xd->lossless)
+  if (!cm->seg.enabled && xd->lossless[0])
     x->optimize = 0;
 
   cm->tx_mode = select_tx_mode(cpi, xd);
@@ -2797,6 +2807,8 @@
       cm->comp_var_ref[0] = LAST_FRAME;
       cm->comp_var_ref[1] = GOLDEN_FRAME;
     }
+  } else {
+    cpi->allow_comp_inter_inter = 0;
   }
 
   if (cpi->sf.frame_parameter_update) {
@@ -2900,7 +2912,9 @@
   }
 }
 
-static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
+static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi,
+                            const MODE_INFO *above_mi, const MODE_INFO *left_mi,
+                            const int intraonly) {
   const PREDICTION_MODE y_mode = mi->mbmi.mode;
   const PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
@@ -2910,10 +2924,25 @@
     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
     for (idy = 0; idy < 2; idy += num_4x4_h)
-      for (idx = 0; idx < 2; idx += num_4x4_w)
-        ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
+      for (idx = 0; idx < 2; idx += num_4x4_w) {
+        const int bidx = idy * 2 + idx;
+        const PREDICTION_MODE bmode = mi->bmi[bidx].as_mode;
+        if (intraonly) {
+          const PREDICTION_MODE a = vp10_above_block_mode(mi, above_mi, bidx);
+          const PREDICTION_MODE l = vp10_left_block_mode(mi, left_mi, bidx);
+          ++counts->kf_y_mode[a][l][bmode];
+        } else {
+          ++counts->y_mode[0][bmode];
+        }
+      }
   } else {
-    ++counts->y_mode[size_group_lookup[bsize]][y_mode];
+    if (intraonly) {
+      const PREDICTION_MODE above = vp10_above_block_mode(mi, above_mi, 0);
+      const PREDICTION_MODE left = vp10_left_block_mode(mi, left_mi, 0);
+      ++counts->kf_y_mode[above][left][y_mode];
+    } else {
+      ++counts->y_mode[size_group_lookup[bsize]][y_mode];
+    }
   }
 
   ++counts->uv_mode[y_mode][uv_mode];
@@ -2953,7 +2982,8 @@
     for (plane = 0; plane < MAX_MB_PLANE; ++plane)
       vp10_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane);
     if (output_enabled)
-      sum_intra_stats(td->counts, mi);
+      sum_intra_stats(td->counts, mi, xd->above_mi, xd->left_mi,
+                      frame_is_intra_only(cm));
 
     if (bsize >= BLOCK_8X8 && output_enabled) {
       if (mbmi->palette_mode_info.palette_size[0] > 0) {

diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 3307720..92ba4dd 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c

@@ -323,195 +323,6 @@
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block,
-                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
-  TX_TYPE tx_type = get_tx_type(plane_type, xd, block);
-  const scan_order *const scan_order = get_scan(tx_size, tx_type);
-  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
-  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  uint16_t *const eob = &p->eobs[block];
-  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
-  int i, j;
-  const int16_t *src_diff;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
-  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
-
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    switch (tx_size) {
-      case TX_32X32:
-        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-        vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
-                                     p->round_fp, p->quant_fp, p->quant_shift,
-                                     qcoeff, dqcoeff, pd->dequant,
-                                     eob, scan_order->scan,
-                                     scan_order->iscan);
-        break;
-      case TX_16X16:
-        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
-        vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
-                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                               pd->dequant, eob,
-                               scan_order->scan, scan_order->iscan);
-        break;
-      case TX_8X8:
-        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
-        vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
-                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                               pd->dequant, eob,
-                               scan_order->scan, scan_order->iscan);
-        break;
-      case TX_4X4:
-        if (xd->lossless) {
-          vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
-        } else {
-          vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
-        }
-        vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
-                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                               pd->dequant, eob,
-                               scan_order->scan, scan_order->iscan);
-        break;
-      default:
-        assert(0);
-    }
-    return;
-  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-  switch (tx_size) {
-    case TX_32X32:
-      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-      vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
-                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                            pd->dequant, eob, scan_order->scan,
-                            scan_order->iscan);
-      break;
-    case TX_16X16:
-      vpx_fdct16x16(src_diff, coeff, diff_stride);
-      vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
-                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                      pd->dequant, eob,
-                      scan_order->scan, scan_order->iscan);
-      break;
-    case TX_8X8:
-      vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
-                        x->skip_block, p->zbin, p->round_fp,
-                        p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                        pd->dequant, eob,
-                        scan_order->scan, scan_order->iscan);
-      break;
-    case TX_4X4:
-      if (xd->lossless) {
-        vp10_fwht4x4(src_diff, coeff, diff_stride);
-      } else {
-        vpx_fdct4x4(src_diff, coeff, diff_stride);
-      }
-      vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
-                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                      pd->dequant, eob,
-                      scan_order->scan, scan_order->iscan);
-      break;
-    default:
-      assert(0);
-      break;
-  }
-}
-
-void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block,
-                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
-  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  uint16_t *const eob = &p->eobs[block];
-  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
-  int i, j;
-  const int16_t *src_diff;
-
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
-  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
-
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    switch (tx_size) {
-      case TX_32X32:
-        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
-        vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
-                                     p->quant_fp[0], qcoeff, dqcoeff,
-                                     pd->dequant[0], eob);
-        break;
-      case TX_16X16:
-        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
-        vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
-                               p->quant_fp[0], qcoeff, dqcoeff,
-                               pd->dequant[0], eob);
-        break;
-      case TX_8X8:
-        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
-        vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
-                               p->quant_fp[0], qcoeff, dqcoeff,
-                               pd->dequant[0], eob);
-        break;
-      case TX_4X4:
-        if (xd->lossless) {
-          vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
-        } else {
-          vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
-        }
-        vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
-                               p->quant_fp[0], qcoeff, dqcoeff,
-                               pd->dequant[0], eob);
-        break;
-      default:
-        assert(0);
-    }
-    return;
-  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-  switch (tx_size) {
-    case TX_32X32:
-      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
-      vpx_quantize_dc_32x32(coeff, x->skip_block, p->round,
-                            p->quant_fp[0], qcoeff, dqcoeff,
-                            pd->dequant[0], eob);
-      break;
-    case TX_16X16:
-      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
-      vpx_quantize_dc(coeff, 256, x->skip_block, p->round,
-                     p->quant_fp[0], qcoeff, dqcoeff,
-                     pd->dequant[0], eob);
-      break;
-    case TX_8X8:
-      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
-      vpx_quantize_dc(coeff, 64, x->skip_block, p->round,
-                      p->quant_fp[0], qcoeff, dqcoeff,
-                      pd->dequant[0], eob);
-      break;
-    case TX_4X4:
-      if (xd->lossless) {
-        vp10_fwht4x4(src_diff, coeff, diff_stride);
-      } else {
-        vpx_fdct4x4(src_diff, coeff, diff_stride);
-      }
-      vpx_quantize_dc(coeff, 16, x->skip_block, p->round,
-                      p->quant_fp[0], qcoeff, dqcoeff,
-                      pd->dequant[0], eob);
-      break;
-    default:
-      assert(0);
-      break;
-  }
-}
-
 void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
                        int diff_stride, TX_TYPE tx_type, int lossless) {
   if (lossless) {
@@ -657,8 +468,9 @@
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
-                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block,
+                         int blk_row, int blk_col,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
   MACROBLOCKD *const xd = &x->e_mbd;
   const struct macroblock_plane *const p = &x->plane[plane];
   const struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -670,10 +482,196 @@
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   uint16_t *const eob = &p->eobs[block];
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
-  int i, j;
   const int16_t *src_diff;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
-  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+      case TX_32X32:
+        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+        vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
+                                     p->round_fp, p->quant_fp, p->quant_shift,
+                                     qcoeff, dqcoeff, pd->dequant,
+                                     eob, scan_order->scan,
+                                     scan_order->iscan);
+        break;
+      case TX_16X16:
+        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
+        vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
+                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                               pd->dequant, eob,
+                               scan_order->scan, scan_order->iscan);
+        break;
+      case TX_8X8:
+        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
+        vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
+                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                               pd->dequant, eob,
+                               scan_order->scan, scan_order->iscan);
+        break;
+      case TX_4X4:
+        if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+          vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
+        } else {
+          vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
+        }
+        vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
+                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                               pd->dequant, eob,
+                               scan_order->scan, scan_order->iscan);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  switch (tx_size) {
+    case TX_32X32:
+      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+      vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
+                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                            pd->dequant, eob, scan_order->scan,
+                            scan_order->iscan);
+      break;
+    case TX_16X16:
+      vpx_fdct16x16(src_diff, coeff, diff_stride);
+      vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
+                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                      pd->dequant, eob,
+                      scan_order->scan, scan_order->iscan);
+      break;
+    case TX_8X8:
+      vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
+                        x->skip_block, p->zbin, p->round_fp,
+                        p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                        pd->dequant, eob,
+                        scan_order->scan, scan_order->iscan);
+      break;
+    case TX_4X4:
+      if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+        vp10_fwht4x4(src_diff, coeff, diff_stride);
+      } else {
+        vpx_fdct4x4(src_diff, coeff, diff_stride);
+      }
+      vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
+                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                      pd->dequant, eob,
+                      scan_order->scan, scan_order->iscan);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block,
+                         int blk_row, int blk_col,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int16_t *src_diff;
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+      case TX_32X32:
+        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
+        vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
+                                     p->quant_fp[0], qcoeff, dqcoeff,
+                                     pd->dequant[0], eob);
+        break;
+      case TX_16X16:
+        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
+        vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
+                               p->quant_fp[0], qcoeff, dqcoeff,
+                               pd->dequant[0], eob);
+        break;
+      case TX_8X8:
+        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
+        vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
+                               p->quant_fp[0], qcoeff, dqcoeff,
+                               pd->dequant[0], eob);
+        break;
+      case TX_4X4:
+        if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+          vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
+        } else {
+          vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
+        }
+        vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
+                               p->quant_fp[0], qcoeff, dqcoeff,
+                               pd->dequant[0], eob);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  switch (tx_size) {
+    case TX_32X32:
+      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
+      vpx_quantize_dc_32x32(coeff, x->skip_block, p->round,
+                            p->quant_fp[0], qcoeff, dqcoeff,
+                            pd->dequant[0], eob);
+      break;
+    case TX_16X16:
+      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
+      vpx_quantize_dc(coeff, 256, x->skip_block, p->round,
+                     p->quant_fp[0], qcoeff, dqcoeff,
+                     pd->dequant[0], eob);
+      break;
+    case TX_8X8:
+      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
+      vpx_quantize_dc(coeff, 64, x->skip_block, p->round,
+                      p->quant_fp[0], qcoeff, dqcoeff,
+                      pd->dequant[0], eob);
+      break;
+    case TX_4X4:
+      if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+        vp10_fwht4x4(src_diff, coeff, diff_stride);
+      } else {
+        vpx_fdct4x4(src_diff, coeff, diff_stride);
+      }
+      vpx_quantize_dc(coeff, 16, x->skip_block, p->round,
+                      p->quant_fp[0], qcoeff, dqcoeff,
+                      pd->dequant[0], eob);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+
+
+void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
+                      int blk_row, int blk_col,
+                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+  TX_TYPE tx_type = get_tx_type(plane_type, xd, block);
+  const scan_order *const scan_order = get_scan(tx_size, tx_type);
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int16_t *src_diff;
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
 
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -702,7 +700,7 @@
         break;
       case TX_4X4:
         vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
-                                 xd->lossless);
+                                 xd->lossless[xd->mi[0]->mbmi.segment_id]);
         vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                               p->quant, p->quant_shift, qcoeff, dqcoeff,
                               pd->dequant, eob,
@@ -738,7 +736,8 @@
                      scan_order->scan, scan_order->iscan);
       break;
     case TX_4X4:
-      vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, xd->lossless);
+      vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
+                        xd->lossless[xd->mi[0]->mbmi.segment_id]);
       vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                      p->quant, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, eob,
@@ -750,7 +749,8 @@
   }
 }
 
-static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
+static void encode_block(int plane, int block, int blk_row, int blk_col,
+                         BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
   struct encode_b_args *const args = arg;
   MACROBLOCK *const x = args->x;
@@ -759,14 +759,12 @@
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  int i, j;
   uint8_t *dst;
   ENTROPY_CONTEXT *a, *l;
   TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block);
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
-  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
-  a = &ctx->ta[plane][i];
-  l = &ctx->tl[plane][j];
+  dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+  a = &ctx->ta[plane][blk_col];
+  l = &ctx->tl[plane][blk_row];
 
   // TODO(jingning): per transformed block zero forcing only enabled for
   // luma component. will integrate chroma components as well.
@@ -785,17 +783,20 @@
         *a = *l = 0;
         return;
       } else {
-        vp10_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+        vp10_xform_quant_fp(x, plane, block, blk_row, blk_col,
+                            plane_bsize, tx_size);
       }
     } else {
       if (max_txsize_lookup[plane_bsize] == tx_size) {
         int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
         if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
           // full forward transform and quantization
-          vp10_xform_quant(x, plane, block, plane_bsize, tx_size);
+          vp10_xform_quant(x, plane, block, blk_row, blk_col,
+                           plane_bsize, tx_size);
         } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
           // fast path forward transform and quantization
-          vp10_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+          vp10_xform_quant_dc(x, plane, block, blk_row, blk_col,
+                              plane_bsize, tx_size);
         } else {
           // skip forward transform
           p->eobs[block] = 0;
@@ -803,7 +804,8 @@
           return;
         }
       } else {
-        vp10_xform_quant(x, plane, block, plane_bsize, tx_size);
+        vp10_xform_quant(x, plane, block, blk_row, blk_col,
+                         plane_bsize, tx_size);
       }
     }
   }
@@ -841,7 +843,7 @@
         // case.
         vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride,
                                      p->eobs[block], xd->bd, tx_type,
-                                     xd->lossless);
+                                     xd->lossless[xd->mi[0]->mbmi.segment_id]);
         break;
       default:
         assert(0 && "Invalid transform size");
@@ -870,7 +872,7 @@
       // which is significant (not just an optimization) for the lossless
       // case.
       vp10_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, p->eobs[block],
-                            tx_type, xd->lossless);
+                            tx_type, xd->lossless[xd->mi[0]->mbmi.segment_id]);
       break;
     default:
       assert(0 && "Invalid transform size");
@@ -878,24 +880,23 @@
   }
 }
 
-static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
+static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
+                               BLOCK_SIZE plane_bsize,
                                TX_SIZE tx_size, void *arg) {
   MACROBLOCK *const x = (MACROBLOCK *)arg;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  int i, j;
   uint8_t *dst;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
-  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
+  dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
 
-  vp10_xform_quant(x, plane, block, plane_bsize, tx_size);
+  vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
 
   if (p->eobs[block] > 0) {
 #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      if (xd->lossless) {
+      if (xd->lossless[0]) {
         vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride,
                                 p->eobs[block], xd->bd);
       } else {
@@ -905,7 +906,7 @@
       return;
     }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-    if (xd->lossless) {
+    if (xd->lossless[0]) {
       vp10_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
     } else {
       vp10_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
@@ -947,8 +948,9 @@
   }
 }
 
-void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
-                            TX_SIZE tx_size, void *arg) {
+void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
+                             BLOCK_SIZE plane_bsize,
+                             TX_SIZE tx_size, void *arg) {
   struct encode_b_args* const args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -963,21 +965,20 @@
   const scan_order *const scan_order = get_scan(tx_size, tx_type);
   PREDICTION_MODE mode;
   const int bwl = b_width_log2_lookup[plane_bsize];
+  const int bhl = b_height_log2_lookup[plane_bsize];
   const int diff_stride = 4 * (1 << bwl);
   uint8_t *src, *dst;
   int16_t *src_diff;
   uint16_t *eob = &p->eobs[block];
   const int src_stride = p->src.stride;
   const int dst_stride = pd->dst.stride;
-  int i, j;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
-  dst = &pd->dst.buf[4 * (j * dst_stride + i)];
-  src = &p->src.buf[4 * (j * src_stride + i)];
-  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+  dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
+  src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
 
   mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
-  vp10_predict_intra_block(xd, bwl, tx_size, mode, dst, dst_stride,
-                          dst, dst_stride, i, j, plane);
+  vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride,
+                          dst, dst_stride, blk_col, blk_row, plane);
 
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -1030,7 +1031,7 @@
           vpx_highbd_subtract_block(4, 4, src_diff, diff_stride,
                                     src, src_stride, dst, dst_stride, xd->bd);
           vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
-                                   xd->lossless);
+                                   xd->lossless[mbmi->segment_id]);
           vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
                                 pd->dequant, eob,
@@ -1042,7 +1043,7 @@
           // eob<=1 which is significant (not just an optimization) for the
           // lossless case.
           vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, xd->bd,
-                                       tx_type, xd->lossless);
+                                       tx_type, xd->lossless[mbmi->segment_id]);
         break;
       default:
         assert(0);
@@ -1099,7 +1100,8 @@
       if (!x->skip_recode) {
         vpx_subtract_block(4, 4, src_diff, diff_stride,
                            src, src_stride, dst, dst_stride);
-        vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, xd->lossless);
+        vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
+                          xd->lossless[mbmi->segment_id]);
         vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
                        p->quant_shift, qcoeff, dqcoeff,
                        pd->dequant, eob, scan_order->scan,
@@ -1111,7 +1113,7 @@
         // which is significant (not just an optimization) for the lossless
         // case.
         vp10_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type,
-                              xd->lossless);
+                              xd->lossless[mbmi->segment_id]);
       }
       break;
     default:
@@ -1127,5 +1129,5 @@
   struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
 
   vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
-                                         vp10_encode_block_intra, &arg);
+                                          vp10_encode_block_intra, &arg);
 }

diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h
index 62a7db4..2e6516e 100644
--- a/vp10/encoder/encodemb.h
+++ b/vp10/encoder/encodemb.h

@@ -26,16 +26,20 @@
 void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
 void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
 void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block,
-                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+                         int blk_row, int blk_col,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block,
-                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+                         int blk_row, int blk_col,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
-                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+                      int blk_row, int blk_col,
+                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 
 void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
 
-void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
-                            TX_SIZE tx_size, void *arg);
+void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
+                             BLOCK_SIZE plane_bsize,
+                             TX_SIZE tx_size, void *arg);
 
 void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
 

diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index f3d7877..5b646a2 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c

@@ -418,7 +418,9 @@
   memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
          MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
 
-  vp10_copy(cc->segment_pred_probs, cm->seg.pred_probs);
+#if !CONFIG_MISC_FIXES
+  vp10_copy(cc->segment_pred_probs, cm->segp.pred_probs);
+#endif
 
   memcpy(cpi->coding_context.last_frame_seg_map_copy,
          cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols));
@@ -444,7 +446,9 @@
   memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
          MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
 
-  vp10_copy(cm->seg.pred_probs, cc->segment_pred_probs);
+#if !CONFIG_MISC_FIXES
+  vp10_copy(cm->segp.pred_probs, cc->segment_pred_probs);
+#endif
 
   memcpy(cm->last_frame_seg_map,
          cpi->coding_context.last_frame_seg_map_copy,
@@ -1440,7 +1444,7 @@
     }
   }
 
-  vp10_reset_segment_features(&cm->seg);
+  vp10_reset_segment_features(cm);
   vp10_set_high_precision_mv(cpi, 0);
 
   {
@@ -2659,7 +2663,7 @@
 static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
   struct loopfilter *lf = &cm->lf;
-  if (xd->lossless) {
+  if (is_lossless_requested(&cpi->oxcf)) {
       lf->filter_level = 0;
   } else {
     struct vpx_usec_timer timer;
@@ -3561,7 +3565,7 @@
   // Set various flags etc to special state if it is a key frame.
   if (frame_is_intra_only(cm)) {
     // Reset the loop filter deltas and segmentation map.
-    vp10_reset_segment_features(&cm->seg);
+    vp10_reset_segment_features(cm);
 
     // If segmentation is enabled force a map update for key frames.
     if (seg->enabled) {
@@ -4115,7 +4119,7 @@
   }
 
   if (oxcf->pass == 1) {
-    cpi->td.mb.e_mbd.lossless = is_lossless_requested(oxcf);
+    cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(oxcf);
     vp10_first_pass(cpi, source);
   } else if (oxcf->pass == 2) {
     Pass2Encode(cpi, size, dest, frame_flags);

diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index a957b63..7028803 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h

@@ -55,7 +55,9 @@
   int nmvcosts[2][MV_VALS];
   int nmvcosts_hp[2][MV_VALS];
 
+#if !CONFIG_MISC_FIXES
   vpx_prob segment_pred_probs[PREDICTION_PROBS];
+#endif
 
   unsigned char *last_frame_seg_map_copy;
 
@@ -454,12 +456,12 @@
 
   int mbmode_cost[INTRA_MODES];
   unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
-  int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
+  int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES];
   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
   int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
-  int palette_y_size_cost[10][PALETTE_SIZES];
-  int palette_uv_size_cost[10][PALETTE_SIZES];
+  int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
+  int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
   int palette_y_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
                                                  [PALETTE_COLORS];
   int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]

diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c
index d155316..c41fa3e 100644
--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c

@@ -614,6 +614,7 @@
                      cm->mi_rows, cm->mi_cols);
 
       // Do intra 16x16 prediction.
+      xd->mi[0]->mbmi.segment_id = 0;
       xd->mi[0]->mbmi.mode = DC_PRED;
       xd->mi[0]->mbmi.tx_size = use_dc_pred ?
          (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;

diff --git a/vp10/encoder/mbgraph.c b/vp10/encoder/mbgraph.c
index 1ba6e10..ed0f539 100644
--- a/vp10/encoder/mbgraph.c
+++ b/vp10/encoder/mbgraph.c

@@ -146,7 +146,7 @@
     unsigned int err;
 
     xd->mi[0]->mbmi.mode = mode;
-    vp10_predict_intra_block(xd, 2, TX_16X16, mode,
+    vp10_predict_intra_block(xd, 2, 2, TX_16X16, mode,
                             x->plane[0].src.buf, x->plane[0].src.stride,
                             xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                             0, 0, 0);

diff --git a/vp10/encoder/palette.c b/vp10/encoder/palette.c
index 04e9118..522e185 100644
--- a/vp10/encoder/palette.c
+++ b/vp10/encoder/palette.c

@@ -8,14 +8,15 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <math.h>
 #include "vp10/encoder/palette.h"
 
 static double calc_dist(const double *p1, const double *p2, int dim) {
   double dist = 0;
   int i = 0;
 
-  for (i = 0; i < dim; i++) {
-    dist = dist + (p1[i] - p2[i]) * (p1[i] - p2[i]);
+  for (i = 0; i < dim; ++i) {
+    dist = dist + (p1[i] - round(p2[i])) * (p1[i] - round(p2[i]));
   }
   return dist;
 }
@@ -25,10 +26,10 @@
   int i, j;
   double min_dist, this_dist;
 
-  for (i = 0; i < n; i++) {
+  for (i = 0; i < n; ++i) {
     min_dist = calc_dist(data + i * dim, centroids, dim);
     indices[i] = 0;
-    for (j = 1; j < k; j++) {
+    for (j = 1; j < k; ++j) {
       this_dist = calc_dist(data + i * dim, centroids + j * dim, dim);
       if (this_dist < min_dist) {
         min_dist = this_dist;
@@ -38,32 +39,39 @@
   }
 }
 
+// Generate a random number in the range [0, 32768).
+static unsigned int lcg_rand16(unsigned int *state) {
+  *state = *state * 1103515245 + 12345;
+  return *state / 65536 % 32768;
+}
+
 static void calc_centroids(const double *data, double *centroids,
                            const uint8_t *indices, int n, int k, int dim) {
   int i, j, index;
   int count[PALETTE_MAX_SIZE];
+  unsigned int rand_state = (unsigned int)data[0];
 
-  srand((unsigned int) data[0]);
+  assert(n <= 32768);
+
   memset(count, 0, sizeof(count[0]) * k);
   memset(centroids, 0, sizeof(centroids[0]) * k * dim);
 
-  for (i = 0; i < n; i++) {
+  for (i = 0; i < n; ++i) {
     index = indices[i];
     assert(index < k);
-    count[index]++;
-    for (j = 0; j < dim; j++) {
+    ++count[index];
+    for (j = 0; j < dim; ++j) {
       centroids[index * dim + j] += data[i * dim + j];
     }
   }
 
-  for (i = 0; i < k; i++) {
+  for (i = 0; i < k; ++i) {
     if (count[i] == 0) {
-      // TODO(huisu): replace rand() with something else.
-      memcpy(centroids + i * dim, data + (rand() % n) * dim,
+      memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim,
                  sizeof(centroids[0]) * dim);
     } else {
       const double norm = 1.0 / count[i];
-      for (j = 0; j < dim; j++)
+      for (j = 0; j < dim; ++j)
         centroids[i * dim + j] *= norm;
     }
   }
@@ -75,7 +83,7 @@
   int i;
   (void) k;
 
-  for (i = 0; i < n; i++)
+  for (i = 0; i < n; ++i)
     dist += calc_dist(data + i * dim, centroids + indices[i] * dim, dim);
 
   return dist;
@@ -107,7 +115,7 @@
     memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim);
     memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
     pre_dist = this_dist;
-    i++;
+    ++i;
   }
 
   return i;
@@ -169,7 +177,7 @@
   for (r = 0; r < rows; ++r) {
     for (c = 0; c < cols; ++c) {
       val = src[r * stride + c];
-      val_count[val]++;
+      ++val_count[val];
     }
   }
 

diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 806b854..4ed1ae2 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c

@@ -76,16 +76,15 @@
                       vp10_intra_mode_tree);
 
   vp10_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp10_intra_mode_tree);
-  vp10_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
-                  vp10_kf_uv_mode_prob[TM_PRED], vp10_intra_mode_tree);
-  vp10_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
-                  fc->uv_mode_prob[TM_PRED], vp10_intra_mode_tree);
+  for (i = 0; i < INTRA_MODES; ++i)
+    vp10_cost_tokens(cpi->intra_uv_mode_cost[i],
+                     fc->uv_mode_prob[i], vp10_intra_mode_tree);
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     vp10_cost_tokens(cpi->switchable_interp_costs[i],
                     fc->switchable_interp_prob[i], vp10_switchable_interp_tree);
 
-  for (i = 0; i < 10; ++i) {
+  for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) {
     vp10_cost_tokens(cpi->palette_y_size_cost[i],
                      vp10_default_palette_y_size_prob[i],
                      vp10_palette_size_tree);
@@ -287,7 +286,6 @@
 void vp10_initialize_rd_consts(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   RD_OPT *const rd = &cpi->rd;
   int i;
 
@@ -303,14 +301,13 @@
                        cm->frame_type != KEY_FRAME) ? 0 : 1;
 
   set_block_thresholds(cm, rd);
-  set_partition_probs(cm, xd);
 
   fill_token_costs(x->token_costs, cm->fc->coef_probs);
 
   if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
       cm->frame_type == KEY_FRAME) {
     for (i = 0; i < PARTITION_CONTEXTS; ++i)
-      vp10_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
+      vp10_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
                       vp10_partition_tree);
   }
 

diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index c9ad945..8ba2110 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c

@@ -451,18 +451,16 @@
   *out_sse = this_sse >> shift;
 }
 
-static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
+static int rate_block(int plane, int block, int blk_row, int blk_col,
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
-  int x_idx, y_idx;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
-
-  return cost_coeffs(args->x, plane, block, args->t_above + x_idx,
-                     args->t_left + y_idx, tx_size,
+  return cost_coeffs(args->x, plane, block, args->t_above + blk_col,
+                     args->t_left + blk_row, tx_size,
                      args->so->scan, args->so->neighbors,
                      args->use_fast_coef_costing);
 }
 
-static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
+static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
+                          BLOCK_SIZE plane_bsize,
                           TX_SIZE tx_size, void *arg) {
   struct rdcost_block_args *args = arg;
   MACROBLOCK *const x = args->x;
@@ -478,20 +476,23 @@
 
   if (!is_inter_block(mbmi)) {
     struct encode_b_args arg = {x, NULL, &mbmi->skip};
-    vp10_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
+    vp10_encode_block_intra(plane, block, blk_row, blk_col,
+                            plane_bsize, tx_size, &arg);
     dist_block(x, plane, block, tx_size, &dist, &sse);
   } else if (max_txsize_lookup[plane_bsize] == tx_size) {
     if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
         SKIP_TXFM_NONE) {
       // full forward transform and quantization
-      vp10_xform_quant(x, plane, block, plane_bsize, tx_size);
+      vp10_xform_quant(x, plane, block, blk_row, blk_col,
+                       plane_bsize, tx_size);
       dist_block(x, plane, block, tx_size, &dist, &sse);
     } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
                SKIP_TXFM_AC_ONLY) {
       // compute DC coefficient
       tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
       tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
-      vp10_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+      vp10_xform_quant_dc(x, plane, block, blk_row, blk_col,
+                          plane_bsize, tx_size);
       sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
       dist = sse;
       if (x->plane[plane].eobs[block]) {
@@ -515,7 +516,7 @@
     }
   } else {
     // full forward transform and quantization
-    vp10_xform_quant(x, plane, block, plane_bsize, tx_size);
+    vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
     dist_block(x, plane, block, tx_size, &dist, &sse);
   }
 
@@ -525,7 +526,7 @@
     return;
   }
 
-  rate = rate_block(plane, block, plane_bsize, tx_size, args);
+  rate = rate_block(plane, block, blk_row, blk_col, tx_size, args);
   rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
   rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
 
@@ -533,7 +534,7 @@
   rd = VPXMIN(rd1, rd2);
   if (plane == 0)
     x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
-                                    (rd1 > rd2 && !xd->lossless);
+        (rd1 > rd2 && !xd->lossless[mbmi->segment_id]);
 
   args->this_rate += rate;
   args->this_dist += dist;
@@ -605,6 +606,21 @@
                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 }
 
+static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
+                                    int *rate, int64_t *distortion,
+                                    int *skip, int64_t *sse,
+                                    int64_t ref_best_rd,
+                                    BLOCK_SIZE bs) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+
+  mbmi->tx_size = TX_4X4;
+
+  txfm_rd_in_plane(x, rate, distortion, skip,
+                   sse, ref_best_rd, 0, bs,
+                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+}
+
 static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
                                    int *rate,
                                    int64_t *distortion,
@@ -674,7 +690,8 @@
       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
     }
 
-    if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
+    if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
+        !s[n] && sse[n] != INT64_MAX) {
       rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
       rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
     }
@@ -709,7 +726,11 @@
 
   assert(bs == xd->mi[0]->mbmi.sb_type);
 
-  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
+  if (CONFIG_MISC_FIXES && xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+    choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
+                            ref_best_rd, bs);
+  } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
+             xd->lossless[xd->mi[0]->mbmi.segment_id]) {
     choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
                            bs);
   } else {
@@ -821,7 +842,8 @@
       vp10_k_means(data, centroids, indices, pre_indices, rows * cols,
                    n, 1, max_itr);
       vp10_insertion_sort(centroids, n);
-
+      for (i = 0; i < n; ++i)
+        centroids[i] = round(centroids[i]);
       // remove duplicates
       i = 1;
       k = n;
@@ -958,12 +980,12 @@
                                                                   p->src_diff);
           tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
           xd->mi[0]->bmi[block].as_mode = mode;
-          vp10_predict_intra_block(xd, 1, TX_4X4, mode, dst, dst_stride,
+          vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
                                   dst, dst_stride,
                                   col + idx, row + idy, 0);
           vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                     dst, dst_stride, xd->bd);
-          if (xd->lossless) {
+          if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
             TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
             const scan_order *so = get_scan(TX_4X4, tx_type);
             vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
@@ -1058,11 +1080,11 @@
             vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
         tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
         xd->mi[0]->bmi[block].as_mode = mode;
-        vp10_predict_intra_block(xd, 1, TX_4X4, mode, dst, dst_stride,
+        vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
                                 dst, dst_stride, col + idx, row + idy, 0);
         vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
 
-        if (xd->lossless) {
+        if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
           TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
           const scan_order *so = get_scan(TX_4X4, tx_type);
           vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
@@ -1348,7 +1370,7 @@
                           &this_distortion, &s, &this_sse, bsize, best_rd))
       continue;
     this_rate = this_rate_tokenonly +
-                cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
+        cpi->intra_uv_mode_cost[xd->mi[0]->mbmi.mode][mode];
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 
     if (this_rd < best_rd) {
@@ -1371,14 +1393,14 @@
                               int *rate, int *rate_tokenonly,
                               int64_t *distortion, int *skippable,
                               BLOCK_SIZE bsize) {
-  const VP10_COMMON *cm = &cpi->common;
   int64_t unused;
 
   x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
                    skippable, &unused, bsize, INT64_MAX);
-  *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
+  *rate = *rate_tokenonly +
+      cpi->intra_uv_mode_cost[x->e_mbd.mi[0]->mbmi.mode][DC_PRED];
   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
 }
 
@@ -1499,12 +1521,13 @@
 
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    fwd_txm4x4 = xd->lossless ? vp10_highbd_fwht4x4 : vpx_highbd_fdct4x4;
+    fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_highbd_fwht4x4
+                                                   : vpx_highbd_fdct4x4;
   } else {
-    fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4;
+    fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
   }
 #else
-  fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4;
+  fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -3462,7 +3485,7 @@
 
         // Cost the skip mb case
         rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
-      } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
+      } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
           // Add in the cost of the no skip flag.
@@ -4217,7 +4240,7 @@
       // Skip is never coded at the segment level for sub8x8 blocks and instead
       // always coded in the bitstream at the mode info level.
 
-      if (ref_frame != INTRA_FRAME && !xd->lossless) {
+      if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
           // Add in the cost of the no skip flag.

diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c
index e5d827b..6a20ee4 100644
--- a/vp10/encoder/segmentation.c
+++ b/vp10/encoder/segmentation.c

@@ -18,6 +18,7 @@
 
 #include "vp10/encoder/cost.h"
 #include "vp10/encoder/segmentation.h"
+#include "vp10/encoder/subexp.h"
 
 void vp10_enable_segmentation(struct segmentation *seg) {
   seg->enabled = 1;
@@ -49,24 +50,40 @@
 }
 
 // Based on set of segment counts calculate a probability tree
-static void calc_segtree_probs(int *segcounts, vpx_prob *segment_tree_probs) {
+static void calc_segtree_probs(unsigned *segcounts,
+    vpx_prob *segment_tree_probs, const vpx_prob *cur_tree_probs) {
   // Work out probabilities of each segment
-  const int c01 = segcounts[0] + segcounts[1];
-  const int c23 = segcounts[2] + segcounts[3];
-  const int c45 = segcounts[4] + segcounts[5];
-  const int c67 = segcounts[6] + segcounts[7];
+  const unsigned cc[4] = {
+    segcounts[0] + segcounts[1], segcounts[2] + segcounts[3],
+    segcounts[4] + segcounts[5], segcounts[6] + segcounts[7]
+  };
+  const unsigned ccc[2] = { cc[0] + cc[1], cc[2] + cc[3] };
+#if CONFIG_MISC_FIXES
+  int i;
+#endif
 
-  segment_tree_probs[0] = get_binary_prob(c01 + c23, c45 + c67);
-  segment_tree_probs[1] = get_binary_prob(c01, c23);
-  segment_tree_probs[2] = get_binary_prob(c45, c67);
+  segment_tree_probs[0] = get_binary_prob(ccc[0], ccc[1]);
+  segment_tree_probs[1] = get_binary_prob(cc[0], cc[1]);
+  segment_tree_probs[2] = get_binary_prob(cc[2], cc[3]);
   segment_tree_probs[3] = get_binary_prob(segcounts[0], segcounts[1]);
   segment_tree_probs[4] = get_binary_prob(segcounts[2], segcounts[3]);
   segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]);
   segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]);
+
+#if CONFIG_MISC_FIXES
+  for (i = 0; i < 7; i++) {
+    const unsigned *ct = i == 0 ? ccc : i < 3 ? cc + (i & 2)
+        : segcounts + (i - 3) * 2;
+    vp10_prob_diff_update_savings_search(ct,
+        cur_tree_probs[i], &segment_tree_probs[i], DIFF_UPDATE_PROB);
+  }
+#else
+  (void) cur_tree_probs;
+#endif
 }
 
 // Based on set of segment counts and probabilities calculate a cost estimate
-static int cost_segmap(int *segcounts, vpx_prob *probs) {
+static int cost_segmap(unsigned *segcounts, vpx_prob *probs) {
   const int c01 = segcounts[0] + segcounts[1];
   const int c23 = segcounts[2] + segcounts[3];
   const int c45 = segcounts[4] + segcounts[5];
@@ -108,9 +125,9 @@
 
 static void count_segs(const VP10_COMMON *cm, MACROBLOCKD *xd,
                        const TileInfo *tile, MODE_INFO **mi,
-                       int *no_pred_segcounts,
-                       int (*temporal_predictor_count)[2],
-                       int *t_unpred_seg_counts,
+                       unsigned *no_pred_segcounts,
+                       unsigned (*temporal_predictor_count)[2],
+                       unsigned *t_unpred_seg_counts,
                        int bw, int bh, int mi_row, int mi_col) {
   int segment_id;
 
@@ -147,9 +164,9 @@
 
 static void count_segs_sb(const VP10_COMMON *cm, MACROBLOCKD *xd,
                           const TileInfo *tile, MODE_INFO **mi,
-                          int *no_pred_segcounts,
-                          int (*temporal_predictor_count)[2],
-                          int *t_unpred_seg_counts,
+                          unsigned *no_pred_segcounts,
+                          unsigned (*temporal_predictor_count)[2],
+                          unsigned *t_unpred_seg_counts,
                           int mi_row, int mi_col,
                           BLOCK_SIZE bsize) {
   const int mis = cm->mi_stride;
@@ -197,24 +214,39 @@
 
 void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
   struct segmentation *seg = &cm->seg;
+#if CONFIG_MISC_FIXES
+  struct segmentation_probs *segp = &cm->fc->seg;
+#else
+  struct segmentation_probs *segp = &cm->segp;
+#endif
 
   int no_pred_cost;
   int t_pred_cost = INT_MAX;
 
   int i, tile_col, mi_row, mi_col;
 
-  int temporal_predictor_count[PREDICTION_PROBS][2] = { { 0 } };
-  int no_pred_segcounts[MAX_SEGMENTS] = { 0 };
-  int t_unpred_seg_counts[MAX_SEGMENTS] = { 0 };
+#if CONFIG_MISC_FIXES
+  unsigned (*temporal_predictor_count)[2] = cm->counts.seg.pred;
+  unsigned *no_pred_segcounts = cm->counts.seg.tree_total;
+  unsigned *t_unpred_seg_counts = cm->counts.seg.tree_mispred;
+#else
+  unsigned temporal_predictor_count[PREDICTION_PROBS][2] = { { 0 } };
+  unsigned no_pred_segcounts[MAX_SEGMENTS] = { 0 };
+  unsigned t_unpred_seg_counts[MAX_SEGMENTS] = { 0 };
+#endif
 
   vpx_prob no_pred_tree[SEG_TREE_PROBS];
   vpx_prob t_pred_tree[SEG_TREE_PROBS];
   vpx_prob t_nopred_prob[PREDICTION_PROBS];
 
+#if CONFIG_MISC_FIXES
+  (void) xd;
+#else
   // Set default state for the segment tree probabilities and the
   // temporal coding probabilities
-  memset(seg->tree_probs, 255, sizeof(seg->tree_probs));
-  memset(seg->pred_probs, 255, sizeof(seg->pred_probs));
+  memset(segp->tree_probs, 255, sizeof(segp->tree_probs));
+  memset(segp->pred_probs, 255, sizeof(segp->pred_probs));
+#endif
 
   // First of all generate stats regarding how well the last segment map
   // predicts this one
@@ -237,14 +269,14 @@
 
   // Work out probability tree for coding segments without prediction
   // and the cost.
-  calc_segtree_probs(no_pred_segcounts, no_pred_tree);
+  calc_segtree_probs(no_pred_segcounts, no_pred_tree, segp->tree_probs);
   no_pred_cost = cost_segmap(no_pred_segcounts, no_pred_tree);
 
   // Key frames cannot use temporal prediction
   if (!frame_is_intra_only(cm)) {
     // Work out probability tree for coding those segments not
     // predicted using the temporal method and the cost.
-    calc_segtree_probs(t_unpred_seg_counts, t_pred_tree);
+    calc_segtree_probs(t_unpred_seg_counts, t_pred_tree, segp->tree_probs);
     t_pred_cost = cost_segmap(t_unpred_seg_counts, t_pred_tree);
 
     // Add in the cost of the signaling for each prediction context.
@@ -252,7 +284,13 @@
       const int count0 = temporal_predictor_count[i][0];
       const int count1 = temporal_predictor_count[i][1];
 
+#if CONFIG_MISC_FIXES
+      vp10_prob_diff_update_savings_search(temporal_predictor_count[i],
+                                           segp->pred_probs[i],
+                                           &t_nopred_prob[i], DIFF_UPDATE_PROB);
+#else
       t_nopred_prob[i] = get_binary_prob(count0, count1);
+#endif
 
       // Add in the predictor signaling cost
       t_pred_cost += count0 * vp10_cost_zero(t_nopred_prob[i]) +
@@ -263,19 +301,30 @@
   // Now choose which coding method to use.
   if (t_pred_cost < no_pred_cost) {
     seg->temporal_update = 1;
-    memcpy(seg->tree_probs, t_pred_tree, sizeof(t_pred_tree));
-    memcpy(seg->pred_probs, t_nopred_prob, sizeof(t_nopred_prob));
+#if !CONFIG_MISC_FIXES
+    memcpy(segp->tree_probs, t_pred_tree, sizeof(t_pred_tree));
+    memcpy(segp->pred_probs, t_nopred_prob, sizeof(t_nopred_prob));
+#endif
   } else {
     seg->temporal_update = 0;
-    memcpy(seg->tree_probs, no_pred_tree, sizeof(no_pred_tree));
+#if !CONFIG_MISC_FIXES
+    memcpy(segp->tree_probs, no_pred_tree, sizeof(no_pred_tree));
+#endif
   }
 }
 
-void vp10_reset_segment_features(struct segmentation *seg) {
+void vp10_reset_segment_features(VP10_COMMON *cm) {
+  struct segmentation *seg = &cm->seg;
+#if !CONFIG_MISC_FIXES
+  struct segmentation_probs *segp = &cm->segp;
+#endif
+
   // Set up default state for MB feature flags
   seg->enabled = 0;
   seg->update_map = 0;
   seg->update_data = 0;
-  memset(seg->tree_probs, 255, sizeof(seg->tree_probs));
+#if !CONFIG_MISC_FIXES
+  memset(segp->tree_probs, 255, sizeof(segp->tree_probs));
+#endif
   vp10_clearall_segfeatures(seg);
 }

diff --git a/vp10/encoder/segmentation.h b/vp10/encoder/segmentation.h
index 91a9937..b8e6c06 100644
--- a/vp10/encoder/segmentation.h
+++ b/vp10/encoder/segmentation.h

@@ -44,7 +44,7 @@
 
 void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd);
 
-void vp10_reset_segment_features(struct segmentation *seg);
+void vp10_reset_segment_features(VP10_COMMON *cm);
 
 #ifdef __cplusplus
 }  // extern "C"

diff --git a/vp10/encoder/subexp.c b/vp10/encoder/subexp.c
index fd0b09b..67e820b 100644
--- a/vp10/encoder/subexp.c
+++ b/vp10/encoder/subexp.c

@@ -16,7 +16,7 @@
 
 #define vp10_cost_upd256  ((int)(vp10_cost_one(upd) - vp10_cost_zero(upd)))
 
-static const int update_bits[255] = {
+static const uint8_t update_bits[255] = {
    5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
    6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
@@ -25,7 +25,8 @@
   10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
   10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
   10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-  10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  10, 11 - CONFIG_MISC_FIXES,
+          11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
   11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
   11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
   11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
@@ -46,7 +47,7 @@
 
 static int remap_prob(int v, int m) {
   int i;
-  static const int map_table[MAX_PROB - 1] = {
+  static const uint8_t map_table[MAX_PROB - 1] = {
     // generated by:
     //   map_table[j] = split_index(j, MAX_PROB - 1, MODULUS_PARAM);
      20,  21,  22,  23,  24,  25,   0,  26,  27,  28,  29,  30,  31,  32,  33,
@@ -85,7 +86,7 @@
 
 static void encode_uniform(vpx_writer *w, int v) {
   const int l = 8;
-  const int m = (1 << l) - 191;
+  const int m = (1 << l) - 191 + CONFIG_MISC_FIXES;
   if (v < m) {
     vpx_write_literal(w, v, l - 1);
   } else {

diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index 2c9998b..e568c0b 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c

@@ -443,7 +443,9 @@
   TOKENEXTRA **tp;
 };
 
-static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
+static void set_entropy_context_b(int plane, int block,
+                                  int blk_row, int blk_col,
+                                  BLOCK_SIZE plane_bsize,
                                   TX_SIZE tx_size, void *arg) {
   struct tokenize_b_args* const args = arg;
   ThreadData *const td = args->td;
@@ -451,10 +453,8 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblock_plane *p = &x->plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
-  int aoff, loff;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
   vp10_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0,
-                   aoff, loff);
+                    blk_col, blk_row);
 }
 
 static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
@@ -520,7 +520,8 @@
   }
 }
 
-static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
+static void tokenize_b(int plane, int block, int blk_row, int blk_col,
+                       BLOCK_SIZE plane_bsize,
                        TX_SIZE tx_size, void *arg) {
   struct tokenize_b_args* const args = arg;
   VP10_COMP *cpi = args->cpi;
@@ -553,11 +554,8 @@
   const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
   int16_t token;
   EXTRABIT extra;
-  int aoff, loff;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
-
-  pt = get_entropy_context(tx_size, pd->above_context + aoff,
-                           pd->left_context + loff);
+  pt = get_entropy_context(tx_size, pd->above_context + blk_col,
+                           pd->left_context + blk_row);
   scan = so->scan;
   nb = so->neighbors;
   c = 0;
@@ -597,20 +595,22 @@
 
   *tp = t;
 
-  vp10_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, aoff, loff);
+  vp10_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, blk_col, blk_row);
 }
 
 struct is_skippable_args {
   uint16_t *eobs;
   int *skippable;
 };
-static void is_skippable(int plane, int block,
+static void is_skippable(int plane, int block, int blk_row, int blk_col,
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                          void *argv) {
   struct is_skippable_args *args = argv;
   (void)plane;
   (void)plane_bsize;
   (void)tx_size;
+  (void)blk_row;
+  (void)blk_col;
   args->skippable[0] &= (!args->eobs[block]);
 }
 
@@ -624,13 +624,15 @@
   return result;
 }
 
-static void has_high_freq_coeff(int plane, int block,
+static void has_high_freq_coeff(int plane, int block, int blk_row, int blk_col,
                                 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                                 void *argv) {
   struct is_skippable_args *args = argv;
   int eobs = (tx_size == TX_4X4) ? 3 : 10;
   (void) plane;
   (void) plane_bsize;
+  (void) blk_row;
+  (void) blk_col;
 
   *(args->skippable) |= (args->eobs[block] > eobs);
 }

diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c
index 409ed1c..21c9c03 100644
--- a/vp10/vp10_cx_iface.c
+++ b/vp10/vp10_cx_iface.c

@@ -795,7 +795,7 @@
   marker |= ctx->pending_frame_count - 1;
 #if CONFIG_MISC_FIXES
   for (i = 0; i < ctx->pending_frame_count - 1; i++) {
-    const size_t frame_sz = (unsigned int) ctx->pending_frame_sizes[i];
+    const size_t frame_sz = (unsigned int) ctx->pending_frame_sizes[i] - 1;
     max_frame_sz = frame_sz > max_frame_sz ? frame_sz : max_frame_sz;
   }
 #endif
@@ -836,8 +836,10 @@
 
     *x++ = marker;
     for (i = 0; i < ctx->pending_frame_count - CONFIG_MISC_FIXES; i++) {
-      unsigned int this_sz = (unsigned int)ctx->pending_frame_sizes[i];
+      unsigned int this_sz;
 
+      assert(ctx->pending_frame_sizes[i] > 0);
+      this_sz = (unsigned int)ctx->pending_frame_sizes[i] - CONFIG_MISC_FIXES;
       for (j = 0; j <= mag; j++) {
         *x++ = this_sz & 0xff;
         this_sz >>= 8;
@@ -1154,30 +1156,6 @@
   }
 }
 
-static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx,
-                                           va_list args) {
-  const int update = va_arg(args, int);
-
-  vp10_update_entropy(ctx->cpi, update);
-  return VPX_CODEC_OK;
-}
-
-static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx,
-                                             va_list args) {
-  const int ref_frame_flags = va_arg(args, int);
-
-  vp10_update_reference(ctx->cpi, ref_frame_flags);
-  return VPX_CODEC_OK;
-}
-
-static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx,
-                                          va_list args) {
-  const int reference_flag = va_arg(args, int);
-
-  vp10_use_as_reference(ctx->cpi, reference_flag);
-  return VPX_CODEC_OK;
-}
-
 static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
                                         va_list args) {
   (void)ctx;
@@ -1274,9 +1252,6 @@
 
 static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP8_COPY_REFERENCE,                ctrl_copy_reference},
-  {VP8E_UPD_ENTROPY,                  ctrl_update_entropy},
-  {VP8E_UPD_REFERENCE,                ctrl_update_reference},
-  {VP8E_USE_REFERENCE,                ctrl_use_reference},
 
   // Setters
   {VP8_SET_REFERENCE,                 ctrl_set_reference},

diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 2a50e59..35ad891 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c

@@ -29,7 +29,7 @@
     pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
     pred[B_RD_PRED] = vpx_d135_predictor_4x4;
     pred[B_VR_PRED] = vpx_d117_predictor_4x4;
-    pred[B_VL_PRED] = vpx_d63e_predictor_4x4;
+    pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
     pred[B_HD_PRED] = vpx_d153_predictor_4x4;
     pred[B_HU_PRED] = vpx_d207_predictor_4x4;
 }

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 80ea6b4..c125ae8 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c

@@ -1277,9 +1277,6 @@
     {VP8_SET_REFERENCE,                 vp8e_set_reference},
     {VP8_COPY_REFERENCE,                vp8e_get_reference},
     {VP8_SET_POSTPROC,                  vp8e_set_previewpp},
-    {VP8E_UPD_ENTROPY,                  vp8e_update_entropy},
-    {VP8E_UPD_REFERENCE,                vp8e_update_reference},
-    {VP8E_USE_REFERENCE,                vp8e_use_reference},
     {VP8E_SET_FRAME_FLAGS,              vp8e_set_frame_flags},
     {VP8E_SET_TEMPORAL_LAYER_ID,        vp8e_set_temporal_layer_id},
     {VP8E_SET_ROI_MAP,                  vp8e_set_roi_map},

diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index 579857b..1c81581 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c

@@ -403,7 +403,6 @@
   {255, 241, 243, 255, 236, 255, 252, 254},
   {255, 243, 245, 255, 237, 255, 252, 254},
   {255, 246, 247, 255, 239, 255, 253, 255},
-  {255, 246, 247, 255, 239, 255, 253, 255},
 };
 
 static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
@@ -743,7 +742,9 @@
 };
 
 static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
-  memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1],
+  // TODO(aconverse): model[PIVOT_NODE] should never be zero.
+  // https://code.google.com/p/webm/issues/detail?id=1089
+  memcpy(probs, vp9_pareto8_full[p == 0 ? 254 : p - 1],
          MODEL_NODES * sizeof(vpx_prob));
 }
 

diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 21611ed..63b3bff 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h

@@ -138,7 +138,7 @@
 // 1, 3, 5, 7, ..., 253, 255
 // In between probabilities are interpolated linearly
 
-#define COEFF_PROB_MODELS 256
+#define COEFF_PROB_MODELS 255
 
 #define UNCONSTRAINED_NODES         3
 

diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 90bde55..ceffded 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h

@@ -112,7 +112,7 @@
 typedef struct VP9Common {
   struct vpx_internal_error_info  error;
   vpx_color_space_t color_space;
-  int color_range;
+  vpx_color_range_t color_range;
   int width;
   int height;
   int render_width;

diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 3d84a28..13a95ae 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c

@@ -133,6 +133,9 @@
   int frame_width, frame_height;
   int x0, y0;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const int need_left = extend_modes[mode] & NEED_LEFT;
+  const int need_above = extend_modes[mode] & NEED_ABOVE;
+  const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
   int base = 128 << (bd - 8);
   // 127 127 127 .. 127 127 127 127 127 127
   // 129  A   B  ..  Y   Z
@@ -153,79 +156,110 @@
   x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
   y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
 
-  // left
-  if (left_available) {
-    if (xd->mb_to_bottom_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (y0 + bs <= frame_height) {
+  // NEED_LEFT
+  if (need_left) {
+    if (left_available) {
+      if (xd->mb_to_bottom_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (y0 + bs <= frame_height) {
+          for (i = 0; i < bs; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+        } else {
+          const int extend_bottom = frame_height - y0;
+          for (i = 0; i < extend_bottom; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+          for (; i < bs; ++i)
+            left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+        }
+      } else {
+        /* faster path if the block does not need extension */
         for (i = 0; i < bs; ++i)
           left_col[i] = ref[i * ref_stride - 1];
-      } else {
-        const int extend_bottom = frame_height - y0;
-        for (i = 0; i < extend_bottom; ++i)
-          left_col[i] = ref[i * ref_stride - 1];
-        for (; i < bs; ++i)
-          left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
       }
     } else {
-      /* faster path if the block does not need extension */
-      for (i = 0; i < bs; ++i)
-        left_col[i] = ref[i * ref_stride - 1];
+      // TODO(Peter): this value should probably change for high bitdepth
+      vpx_memset16(left_col, base + 1, bs);
     }
-  } else {
-    // TODO(Peter): this value should probably change for high bitdepth
-    vpx_memset16(left_col, base + 1, bs);
   }
 
-  // TODO(hkuang) do not extend 2*bs pixels for all modes.
-  // above
-  if (up_available) {
-    const uint16_t *above_ref = ref - ref_stride;
-    if (xd->mb_to_right_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (x0 + 2 * bs <= frame_width) {
-        if (right_available && bs == 4) {
-          memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+  // NEED_ABOVE
+  if (need_above) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + bs <= frame_width) {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+          vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width);
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
         } else {
           memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
         }
-      } else if (x0 + bs <= frame_width) {
-        const int r = frame_width - x0;
-        if (right_available && bs == 4) {
+      }
+      above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+    } else {
+      vpx_memset16(above_row, base - 1, bs);
+      above_row[-1] = base - 1;
+    }
+  }
+
+  // NEED_ABOVERIGHT
+  if (need_aboveright) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + 2 * bs <= frame_width) {
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 + bs <= frame_width) {
+          const int r = frame_width - x0;
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+            vpx_memset16(above_row + r, above_row[r - 1],
+                         x0 + 2 * bs - frame_width);
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
           memcpy(above_row, above_ref, r * sizeof(above_row[0]));
           vpx_memset16(above_row + r, above_row[r - 1],
                        x0 + 2 * bs - frame_width);
+        }
+        // TODO(Peter) this value should probably change for high bitdepth
+        above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
         } else {
           memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          if (bs == 4 && right_available)
+            memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
+          else
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          // TODO(Peter): this value should probably change for high bitdepth
+          above_row[-1] = left_available ? above_ref[-1] : (base + 1);
         }
-      } else if (x0 <= frame_width) {
-        const int r = frame_width - x0;
-        memcpy(above_row, above_ref, r * sizeof(above_row[0]));
-        vpx_memset16(above_row + r, above_row[r - 1],
-                       x0 + 2 * bs - frame_width);
       }
-      // TODO(Peter) this value should probably change for high bitdepth
-      above_row[-1] = left_available ? above_ref[-1] : (base+1);
     } else {
-      /* faster path if the block does not need extension */
-      if (bs == 4 && right_available && left_available) {
-        const_above_row = above_ref;
-      } else {
-        memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-        if (bs == 4 && right_available)
-          memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
-        else
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
-        // TODO(Peter): this value should probably change for high bitdepth
-        above_row[-1] = left_available ? above_ref[-1] : (base+1);
-      }
+      vpx_memset16(above_row, base - 1, bs * 2);
+      // TODO(Peter): this value should probably change for high bitdepth
+      above_row[-1] = base - 1;
     }
-  } else {
-    vpx_memset16(above_row, base - 1, bs * 2);
-    // TODO(Peter): this value should probably change for high bitdepth
-    above_row[-1] = base - 1;
   }
 
   // predict

diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index ed5f4ca..5bf71ef 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -248,7 +248,7 @@
   specialize qw/vp9_highbd_block_error/, "$sse2_x86inc";
 
   add_proto qw/int64_t vp9_highbd_block_error_8bit/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
-  specialize qw/vp9_highbd_block_error_8bit/, "$sse2_x86inc";
+  specialize qw/vp9_highbd_block_error_8bit/, "$sse2_x86inc", "$avx_x86inc";
 
   add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/vp9_quantize_fp/;

diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 8f20686..89f8a16 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c

@@ -1795,8 +1795,7 @@
   }
   cm->color_space = vpx_rb_read_literal(rb, 3);
   if (cm->color_space != VPX_CS_SRGB) {
-    // [16,235] (including xvycc) vs [0,255] range
-    cm->color_range = vpx_rb_read_bit(rb);
+    cm->color_range = (vpx_color_range_t)vpx_rb_read_bit(rb);
     if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
       cm->subsampling_x = vpx_rb_read_bit(rb);
       cm->subsampling_y = vpx_rb_read_bit(rb);
@@ -1810,7 +1809,7 @@
       cm->subsampling_y = cm->subsampling_x = 1;
     }
   } else {
-    cm->color_range = 1;
+    cm->color_range = VPX_CR_FULL_RANGE;
     if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
       // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed.
       // 4:2:2 or 4:4:0 chroma sampling is not allowed.
@@ -1916,7 +1915,7 @@
         // specifies that the default color format should be YUV 4:2:0 in this
         // case (normative).
         cm->color_space = VPX_CS_BT_601;
-        cm->color_range = 0;
+        cm->color_range = VPX_CR_STUDIO_RANGE;
         cm->subsampling_y = cm->subsampling_x = 1;
         cm->bit_depth = VPX_BITS_8;
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -1980,6 +1979,8 @@
   if (!cm->error_resilient_mode) {
     cm->refresh_frame_context = vpx_rb_read_bit(rb);
     cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb);
+    if (!cm->frame_parallel_decoding_mode)
+      vp9_zero(cm->counts);
   } else {
     cm->refresh_frame_context = 0;
     cm->frame_parallel_decoding_mode = 1;
@@ -2203,8 +2204,6 @@
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Uninitialized entropy context.");
 
-  vp9_zero(cm->counts);
-
   xd->corrupted = 0;
   new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
   if (new_fb->corrupted)

diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index 05af706..ce33cbd 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h

@@ -16,6 +16,8 @@
 extern "C" {
 #endif
 
+#include "vp9/common/vp9_enums.h"
+
 struct VP9Decoder;
 struct vpx_read_bit_buffer;
 

diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c
index 4fbc6db..05b3853 100644
--- a/vp9/decoder/vp9_dsubexp.c
+++ b/vp9/decoder/vp9_dsubexp.c

@@ -29,7 +29,7 @@
 }
 
 static int inv_remap_prob(int v, int m) {
-  static int inv_map_table[MAX_PROB] = {
+  static uint8_t inv_map_table[MAX_PROB] = {
       7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176, 189,
     202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,  10,  11,
      12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,  25,  26,  27,

diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 2cd89c0..f221cb3 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c

@@ -486,10 +486,14 @@
   // Account for larger interval on base layer for temporal layers.
   if (cr->percent_refresh > 0 &&
       rc->frames_since_key <  (4 * cpi->svc.number_temporal_layers) *
-      (100 / cr->percent_refresh))
+      (100 / cr->percent_refresh)) {
     cr->rate_ratio_qdelta = 3.0;
-  else
+  } else {
     cr->rate_ratio_qdelta = 2.0;
+  if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium)
+    // Reduce the delta-qp if the estimated source noise is above threshold.
+    cr->rate_ratio_qdelta = 1.5;
+  }
   // Adjust some parameters for low resolutions at low bitrates.
   if (cm->width <= 352 &&
       cm->height <= 288 &&

diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 4615554..1a14ea9 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c

@@ -123,72 +123,66 @@
 static void pack_mb_tokens(vpx_writer *w,
                            TOKENEXTRA **tp, const TOKENEXTRA *const stop,
                            vpx_bit_depth_t bit_depth) {
-  TOKENEXTRA *p = *tp;
-
-  while (p < stop && p->token != EOSB_TOKEN) {
-    const int t = p->token;
-    const struct vp9_token *const a = &vp9_coef_encodings[t];
-    int i = 0;
-    int v = a->value;
-    int n = a->len;
+  const TOKENEXTRA *p;
+  const vp9_extra_bit *const extra_bits =
 #if CONFIG_VP9_HIGHBITDEPTH
-    const vp9_extra_bit *b;
-    if (bit_depth == VPX_BITS_12)
-      b = &vp9_extra_bits_high12[t];
-    else if (bit_depth == VPX_BITS_10)
-      b = &vp9_extra_bits_high10[t];
-    else
-      b = &vp9_extra_bits[t];
+    (bit_depth == VPX_BITS_12) ? vp9_extra_bits_high12 :
+    (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 :
+    vp9_extra_bits;
 #else
-    const vp9_extra_bit *const b = &vp9_extra_bits[t];
+    vp9_extra_bits;
     (void) bit_depth;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-    /* skip one or two nodes */
-    if (p->skip_eob_node) {
-      n -= p->skip_eob_node;
-      i = 2 * p->skip_eob_node;
+  for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) {
+    if (p->token == EOB_TOKEN) {
+      vpx_write(w, 0, p->context_tree[0]);
+      continue;
     }
-
-    // TODO(jbb): expanding this can lead to big gains.  It allows
-    // much better branch prediction and would enable us to avoid numerous
-    // lookups and compares.
-
-    // If we have a token that's in the constrained set, the coefficient tree
-    // is split into two treed writes.  The first treed write takes care of the
-    // unconstrained nodes.  The second treed write takes care of the
-    // constrained nodes.
-    if (t >= TWO_TOKEN && t < EOB_TOKEN) {
-      int len = UNCONSTRAINED_NODES - p->skip_eob_node;
-      int bits = v >> (n - len);
-      vp9_write_tree(w, vp9_coef_tree, p->context_tree, bits, len, i);
-      vp9_write_tree(w, vp9_coef_con_tree,
-                     vp9_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
-                     v, n - len, 0);
-    } else {
-      vp9_write_tree(w, vp9_coef_tree, p->context_tree, v, n, i);
-    }
-
-    if (b->base_val) {
-      const int e = p->extra, l = b->len;
-
-      if (l) {
-        const unsigned char *pb = b->prob;
-        int v = e >> 1;
-        int n = l;              /* number of bits in v, assumed nonzero */
-
-        do {
-          const int bb = (v >> --n) & 1;
-          vpx_write(w, bb, *pb++);
-        } while (n);
+    vpx_write(w, 1, p->context_tree[0]);
+    while (p->token == ZERO_TOKEN) {
+      vpx_write(w, 0, p->context_tree[1]);
+      ++p;
+      if (p == stop || p->token == EOSB_TOKEN) {
+        *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
+        return;
       }
-
-      vpx_write_bit(w, e & 1);
     }
-    ++p;
-  }
 
-  *tp = p + (p->token == EOSB_TOKEN);
+    {
+      const int t = p->token;
+      const vpx_prob *const context_tree = p->context_tree;
+      assert(t != ZERO_TOKEN);
+      assert(t != EOB_TOKEN);
+      assert(t != EOSB_TOKEN);
+      vpx_write(w, 1, context_tree[1]);
+      if (t == ONE_TOKEN) {
+        vpx_write(w, 0, context_tree[2]);
+        vpx_write_bit(w, p->extra & 1);
+      } else {  // t >= TWO_TOKEN && t < EOB_TOKEN
+        const struct vp9_token *const a = &vp9_coef_encodings[t];
+        const int v = a->value;
+        const int n = a->len;
+        const int e = p->extra;
+        vpx_write(w, 1, context_tree[2]);
+        vp9_write_tree(w, vp9_coef_con_tree,
+                       vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v,
+                       n - UNCONSTRAINED_NODES, 0);
+        if (t >= CATEGORY1_TOKEN) {
+          const vp9_extra_bit *const b = &extra_bits[t];
+          const unsigned char *pb = b->prob;
+          int v = e >> 1;
+          int n = b->len;  // number of bits in v, assumed nonzero
+          do {
+            const int bb = (v >> --n) & 1;
+            vpx_write(w, bb, *pb++);
+          } while (n);
+        }
+        vpx_write_bit(w, e & 1);
+      }
+    }
+  }
+  *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
 }
 
 static void write_segment_id(vpx_writer *w, const struct segmentation *seg,

diff --git a/vp9/encoder/vp9_context_tree.c b/vp9/encoder/vp9_context_tree.c
index e87cccb..396ed3f 100644
--- a/vp9/encoder/vp9_context_tree.c
+++ b/vp9/encoder/vp9_context_tree.c

@@ -30,13 +30,13 @@
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     for (k = 0; k < 3; ++k) {
       CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
-                      vpx_memalign(16, num_blk * sizeof(*ctx->eobs[i][k])));
+                      vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
       ctx->coeff_pbuf[i][k]   = ctx->coeff[i][k];
       ctx->qcoeff_pbuf[i][k]  = ctx->qcoeff[i][k];
       ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];

diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 678e312..c592832 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c

@@ -10,6 +10,8 @@
 
 #include <assert.h>
 #include <limits.h>
+#include <math.h>
+
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 #include "vpx_scale/yv12config.h"
@@ -17,13 +19,9 @@
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/encoder/vp9_context_tree.h"
 #include "vp9/encoder/vp9_denoiser.h"
+#include "vp9/encoder/vp9_encoder.h"
 
-/* The VP9 denoiser is a work-in-progress. It currently is only designed to work
- * with speed 6, though it (inexplicably) seems to also work with speed 5 (one
- * would need to modify the source code in vp9_pickmode.c and vp9_encoder.c to
- * make the calls to the vp9_denoiser_* functions when in speed 5).
- *
- * The implementation is very similar to that of the VP8 denoiser. While
+/* The VP9 denoiser is similar to that of the VP8 denoiser. While
  * choosing the motion vectors / reference frames, the denoiser is run, and if
  * it did not modify the signal to much, the denoised block is copied to the
  * signal.
@@ -195,7 +193,7 @@
                                                          int mi_row,
                                                          int mi_col,
                                                          PICK_MODE_CONTEXT *ctx,
-                                                         int *motion_magnitude,
+                                                         int motion_magnitude,
                                                          int is_skin) {
   int mv_col, mv_row;
   int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
@@ -209,18 +207,17 @@
 
   mv_col = ctx->best_sse_mv.as_mv.col;
   mv_row = ctx->best_sse_mv.as_mv.row;
-  *motion_magnitude = mv_row * mv_row + mv_col * mv_col;
   frame = ctx->best_reference_frame;
 
   saved_mbmi = *mbmi;
 
-  if (is_skin && *motion_magnitude > 16)
+  if (is_skin && motion_magnitude > 16)
     return COPY_BLOCK;
 
   // If the best reference frame uses inter-prediction and there is enough of a
   // difference in sum-squared-error, use it.
   if (frame != INTRA_FRAME &&
-      sse_diff > sse_diff_thresh(bs, increase_denoising, *motion_magnitude)) {
+      sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) {
     mbmi->ref_frame[0] = ctx->best_reference_frame;
     mbmi->mode = ctx->best_sse_inter_mode;
     mbmi->mv[0] = ctx->best_sse_mv;
@@ -242,7 +239,7 @@
     *mbmi = saved_mbmi;
     return COPY_BLOCK;
   }
-  if (*motion_magnitude >
+  if (motion_magnitude >
      (noise_motion_thresh(bs, increase_denoising) << 3)) {
     // Restore everything to its original state
     *mbmi = saved_mbmi;
@@ -315,6 +312,7 @@
 void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
                           int mi_row, int mi_col, BLOCK_SIZE bs,
                           PICK_MODE_CONTEXT *ctx) {
+  int mv_col, mv_row;
   int motion_magnitude = 0;
   VP9_DENOISER_DECISION decision = COPY_BLOCK;
   YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
@@ -325,7 +323,7 @@
   struct buf_2d src = mb->plane[0].src;
   int is_skin = 0;
 
-  if (bs <= BLOCK_16X16) {
+  if (bs <= BLOCK_16X16 && denoiser->denoising_level >= kDenMedium) {
     // Take center pixel in block to determine is_skin.
     const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1;
     const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1;
@@ -342,17 +340,28 @@
     is_skin = vp9_skin_pixel(ysource, usource, vsource);
   }
 
-  decision = perform_motion_compensation(denoiser, mb, bs,
-                                         denoiser->increase_denoising,
-                                         mi_row, mi_col, ctx,
-                                         &motion_magnitude,
-                                         is_skin);
+  mv_col = ctx->best_sse_mv.as_mv.col;
+  mv_row = ctx->best_sse_mv.as_mv.row;
+  motion_magnitude = mv_row * mv_row + mv_col * mv_col;
+  if (denoiser->denoising_level == kDenHigh && motion_magnitude < 16) {
+    denoiser->increase_denoising = 1;
+  } else {
+    denoiser->increase_denoising = 0;
+  }
+
+  if (denoiser->denoising_level >= kDenMedium)
+    decision = perform_motion_compensation(denoiser, mb, bs,
+                                           denoiser->increase_denoising,
+                                           mi_row, mi_col, ctx,
+                                           motion_magnitude,
+                                           is_skin);
 
   if (decision == FILTER_BLOCK) {
     decision = vp9_denoiser_filter(src.buf, src.stride,
                                  mc_avg_start, mc_avg.y_stride,
                                  avg_start, avg.y_stride,
-                                 0, bs, motion_magnitude);
+                                 denoiser->increase_denoising,
+                                 bs, motion_magnitude);
   }
 
   if (decision == FILTER_BLOCK) {
@@ -499,6 +508,17 @@
     vp9_denoiser_free(denoiser);
     return 1;
   }
+
+  fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height,
+                                ssx, ssy,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                use_highbitdepth,
+#endif
+                                border, legacy_byte_alignment);
+  if (fail) {
+    vp9_denoiser_free(denoiser);
+    return 1;
+  }
 #ifdef OUTPUT_YUV_DENOISED
   make_grayscale(&denoiser->running_avg_y[i]);
 #endif
@@ -517,6 +537,12 @@
     vpx_free_frame_buffer(&denoiser->running_avg_y[i]);
   }
   vpx_free_frame_buffer(&denoiser->mc_running_avg_y);
+  vpx_free_frame_buffer(&denoiser->last_source);
+}
+
+void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser,
+                                  int noise_level) {
+  denoiser->denoising_level = noise_level;
 }
 
 #ifdef OUTPUT_YUV_DENOISED

diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index ec0b25e..8bed9e8 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h

@@ -26,13 +26,23 @@
   FILTER_BLOCK
 } VP9_DENOISER_DECISION;
 
+typedef enum vp9_denoiser_level {
+  kDenLow,
+  kDenMedium,
+  kDenHigh
+} VP9_DENOISER_LEVEL;
+
 typedef struct vp9_denoiser {
   YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES];
   YV12_BUFFER_CONFIG mc_running_avg_y;
+  YV12_BUFFER_CONFIG last_source;
   int increase_denoising;
   int frame_buffer_initialized;
+  VP9_DENOISER_LEVEL denoising_level;
 } VP9_DENOISER;
 
+struct VP9_COMP;
+
 void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
                                     YV12_BUFFER_CONFIG src,
                                     FRAME_TYPE frame_type,
@@ -69,6 +79,9 @@
 
 void vp9_denoiser_free(VP9_DENOISER *denoiser);
 
+void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser,
+                                  int noise_level);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 2333a13..9d66839 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c

@@ -481,7 +481,7 @@
   VP9_COMMON *const cm = &cpi->common;
   const int is_key_frame = (cm->frame_type == KEY_FRAME);
   const int threshold_multiplier = is_key_frame ? 20 : 1;
-  const int64_t threshold_base = (int64_t)(threshold_multiplier *
+  int64_t threshold_base = (int64_t)(threshold_multiplier *
       cpi->y_dequant[q][1]);
   if (is_key_frame) {
     thresholds[0] = threshold_base;
@@ -489,6 +489,14 @@
     thresholds[2] = threshold_base >> 2;
     thresholds[3] = threshold_base << 2;
   } else {
+    // Increase base variance threshold if estimated noise level is high.
+    if (cpi->noise_estimate.enabled) {
+      if (cpi->noise_estimate.level == kHigh)
+        threshold_base = threshold_base << 2;
+      else
+        if (cpi->noise_estimate.level == kMedium)
+          threshold_base = threshold_base << 1;
+    }
     thresholds[1] = threshold_base;
     if (cm->width <= 352 && cm->height <= 288) {
       thresholds[0] = threshold_base >> 2;

diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 5b75d67..d86a7a7 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c

@@ -47,6 +47,7 @@
 #include "vp9/encoder/vp9_ethread.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_mbgraph.h"
+#include "vp9/encoder/vp9_noise_estimate.h"
 #include "vp9/encoder/vp9_picklpf.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rd.h"
@@ -805,6 +806,8 @@
   cpi->ref_frame_flags = 0;
 
   init_buffer_indices(cpi);
+
+  vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
 }
 
 static void set_rc_buffer_sizes(RATE_CONTROL *rc,
@@ -3160,6 +3163,7 @@
     // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
     set_mv_search_params(cpi);
 
+    vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
 #if CONFIG_VP9_TEMPORAL_DENOISING
     // Reset the denoiser on the resized frame.
     if (cpi->oxcf.noise_sensitivity > 0) {
@@ -3241,15 +3245,19 @@
 
   // Avoid scaling last_source unless its needed.
   // Last source is currently only used for screen-content mode,
-  // or if partition_search_type == SOURCE_VAR_BASED_PARTITION.
+  // if partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
+  // estimation is enabled.
   if (cpi->unscaled_last_source != NULL &&
       (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
-      cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION))
+      cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
+      cpi->noise_estimate.enabled))
     cpi->Last_Source = vp9_scale_if_required(cm,
                                              cpi->unscaled_last_source,
                                              &cpi->scaled_last_source,
                                              (cpi->oxcf.pass == 0));
 
+  vp9_update_noise_estimate(cpi);
+
   if (cpi->oxcf.pass == 0 &&
       cpi->oxcf.rc_mode == VPX_CBR &&
       cpi->resize_state == 0 &&
@@ -3790,14 +3798,17 @@
   }
 
   // For 1 pass CBR, check if we are dropping this frame.
-  // Never drop on key frame.
+  // For spatial layers, for now only check for frame-dropping on first spatial
+  // layer, and if decision is to drop, we drop whole super-frame.
   if (oxcf->pass == 0 &&
       oxcf->rc_mode == VPX_CBR &&
       cm->frame_type != KEY_FRAME) {
-    if (vp9_rc_drop_frame(cpi)) {
+    if (vp9_rc_drop_frame(cpi) ||
+        (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
       vp9_rc_postencode_update_drop_frame(cpi);
       ++cm->current_video_frame;
       cpi->ext_refresh_frame_flags_pending = 0;
+      cpi->svc.rc_drop_superframe = 1;
       return;
     }
   }

diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 174e2b4..975d9f4 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h

@@ -35,6 +35,7 @@
 #include "vp9/encoder/vp9_lookahead.h"
 #include "vp9/encoder/vp9_mbgraph.h"
 #include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_noise_estimate.h"
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rd.h"
@@ -238,7 +239,7 @@
   int use_highbitdepth;
 #endif
   vpx_color_space_t color_space;
-  int color_range;
+  vpx_color_range_t color_range;
   int render_width;
   int render_height;
   VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
@@ -469,7 +470,7 @@
 
   int mbmode_cost[INTRA_MODES];
   unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
-  int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
+  int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES][INTRA_MODES];
   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
   int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
@@ -490,6 +491,8 @@
   int resize_buffer_underflow;
   int resize_count;
 
+  NOISE_ESTIMATE noise_estimate;
+
   // VAR_BASED_PARTITION thresholds
   // 0 - threshold_64x64; 1 - threshold_32x32;
   // 2 - threshold_16x16; 3 - vbp_threshold_8x8;

diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index a6b5ebb..30738b5 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c

@@ -1183,10 +1183,13 @@
                                      double group_weight_factor) {
   const RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+  // Clamp the target rate to VBR min / max limts.
+  const int target_rate =
+      vp9_rc_clamp_pframe_target_size(cpi, section_target_bandwidth);
 
   inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
 
-  if (section_target_bandwidth <= 0) {
+  if (target_rate <= 0) {
     return rc->worst_quality;  // Highest value allowed
   } else {
     const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
@@ -1195,7 +1198,7 @@
     const double av_err_per_mb = section_err / active_mbs;
     const double speed_term = 1.0 + 0.04 * oxcf->speed;
     const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
-    const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
+    const int target_norm_bits_per_mb = ((uint64_t)target_rate <<
                                          BPER_MB_NORMBITS) / active_mbs;
 
     int q;
@@ -2444,7 +2447,7 @@
     if ((i <= rc->max_gf_interval) ||
         ((i <= (rc->max_gf_interval * 4)) && (decay_accumulator > 0.5))) {
       const double frame_boost =
-        calc_frame_boost(cpi, this_frame, 0, KF_MAX_BOOST);
+        calc_frame_boost(cpi, &next_frame, 0, KF_MAX_BOOST);
 
       // How fast is prediction quality decaying.
       if (!detect_flash(twopass, 0)) {
@@ -2737,11 +2740,6 @@
   }
 
   target_rate = gf_group->bit_allocation[gf_group->index];
-  if (cpi->common.frame_type == KEY_FRAME)
-    target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
-  else
-    target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
-
   rc->base_frame_target = target_rate;
 
   {

diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index be8f57f..9744e43 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c

@@ -103,17 +103,17 @@
 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
   int len, ss_count = 1;
 
-  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
-  cfg->ss[0].offset = 0;
+  cfg->ss_mv[0].col = 0;
+  cfg->ss_mv[0].row = 0;
+  cfg->ss_os[0] = 0;
 
   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
     // Generate offsets for 4 search sites per step.
     const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
     int i;
-    for (i = 0; i < 4; ++i) {
-      search_site *const ss = &cfg->ss[ss_count++];
-      ss->mv = ss_mvs[i];
-      ss->offset = ss->mv.row * stride + ss->mv.col;
+    for (i = 0; i < 4; ++i, ++ss_count) {
+      cfg->ss_mv[ss_count] = ss_mvs[i];
+      cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
     }
   }
 
@@ -124,8 +124,9 @@
 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
   int len, ss_count = 1;
 
-  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
-  cfg->ss[0].offset = 0;
+  cfg->ss_mv[0].col = 0;
+  cfg->ss_mv[0].row = 0;
+  cfg->ss_os[0] = 0;
 
   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
     // Generate offsets for 8 search sites per step.
@@ -134,10 +135,9 @@
       {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
     };
     int i;
-    for (i = 0; i < 8; ++i) {
-      search_site *const ss = &cfg->ss[ss_count++];
-      ss->mv = ss_mvs[i];
-      ss->offset = ss->mv.row * stride + ss->mv.col;
+    for (i = 0; i < 8; ++i, ++ss_count) {
+      cfg->ss_mv[ss_count] = ss_mvs[i];
+      cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
     }
   }
 
@@ -1623,7 +1623,9 @@
   // 0 = initial step (MAX_FIRST_STEP) pel
   // 1 = (MAX_FIRST_STEP/2) pel,
   // 2 = (MAX_FIRST_STEP/4) pel...
-  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+//  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+  const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
+  const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
   const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
 
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
@@ -1649,10 +1651,10 @@
 
     // All_in is true if every one of the points we are checking are within
     // the bounds of the image.
-    all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
-    all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
-    all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
-    all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
+    all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_row_min);
+    all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_row_max);
+    all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_col_min);
+    all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_col_max);
 
     // If all the pixels are within the bounds we don't check whether the
     // search point is valid in this loop,  otherwise we check each point
@@ -1664,15 +1666,15 @@
         unsigned char const *block_offset[4];
 
         for (t = 0; t < 4; t++)
-          block_offset[t] = ss[i + t].offset + best_address;
+          block_offset[t] = ss_os[i + t] + best_address;
 
         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
                        sad_array);
 
         for (t = 0; t < 4; t++, i++) {
           if (sad_array[t] < bestsad) {
-            const MV this_mv = {best_mv->row + ss[i].mv.row,
-                                best_mv->col + ss[i].mv.col};
+            const MV this_mv = {best_mv->row + ss_mv[i].row,
+                                best_mv->col + ss_mv[i].col};
             sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
                                            sad_per_bit);
             if (sad_array[t] < bestsad) {
@@ -1685,11 +1687,11 @@
     } else {
       for (j = 0; j < cfg->searches_per_step; j++) {
         // Trap illegal vectors
-        const MV this_mv = {best_mv->row + ss[i].mv.row,
-                            best_mv->col + ss[i].mv.col};
+        const MV this_mv = {best_mv->row + ss_mv[i].row,
+                            best_mv->col + ss_mv[i].col};
 
         if (is_mv_in(x, &this_mv)) {
-          const uint8_t *const check_here = ss[i].offset + best_address;
+          const uint8_t *const check_here = ss_os[i] + best_address;
           unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
                                              in_what_stride);
 
@@ -1705,25 +1707,25 @@
       }
     }
     if (best_site != last_site) {
-      best_mv->row += ss[best_site].mv.row;
-      best_mv->col += ss[best_site].mv.col;
-      best_address += ss[best_site].offset;
+      best_mv->row += ss_mv[best_site].row;
+      best_mv->col += ss_mv[best_site].col;
+      best_address += ss_os[best_site];
       last_site = best_site;
 #if defined(NEW_DIAMOND_SEARCH)
       while (1) {
-        const MV this_mv = {best_mv->row + ss[best_site].mv.row,
-                            best_mv->col + ss[best_site].mv.col};
+        const MV this_mv = {best_mv->row + ss_mv[best_site].row,
+                            best_mv->col + ss_mv[best_site].col};
         if (is_mv_in(x, &this_mv)) {
-          const uint8_t *const check_here = ss[best_site].offset + best_address;
+          const uint8_t *const check_here = ss_os[best_site] + best_address;
           unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
                                              in_what_stride);
           if (thissad < bestsad) {
             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (thissad < bestsad) {
               bestsad = thissad;
-              best_mv->row += ss[best_site].mv.row;
-              best_mv->col += ss[best_site].mv.col;
-              best_address += ss[best_site].offset;
+              best_mv->row += ss_mv[best_site].row;
+              best_mv->col += ss_mv[best_site].col;
+              best_address += ss_os[best_site];
               continue;
             }
           }

diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 5efd543..e48259f 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h

@@ -31,14 +31,10 @@
 // for Block_16x16
 #define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
 
-// motion search site
-typedef struct search_site {
-  MV mv;
-  int offset;
-} search_site;
-
 typedef struct search_site_config {
-  search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
+  // motion search sites
+  MV  ss_mv[8 * MAX_MVSEARCH_STEPS + 1];        // Motion vector
+  intptr_t ss_os[8 * MAX_MVSEARCH_STEPS + 1];   // Offset
   int ss_count;
   int searches_per_step;
 } search_site_config;

diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c
new file mode 100644
index 0000000..e0d7575
--- /dev/null
+++ b/vp9/encoder/vp9_noise_estimate.c

@@ -0,0 +1,226 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_scale/yv12config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/encoder/vp9_context_tree.h"
+#include "vp9/encoder/vp9_noise_estimate.h"
+#include "vp9/encoder/vp9_encoder.h"
+
+void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
+                             int width,
+                             int height) {
+  ne->enabled = 0;
+  ne->level = kLow;
+  ne->value = 0;
+  ne->count = 0;
+  ne->thresh = 90;
+  ne->last_w = 0;
+  ne->last_h = 0;
+  if (width * height >= 1920 * 1080) {
+    ne->thresh = 200;
+  } else if (width * height >= 1280 * 720) {
+    ne->thresh = 130;
+  }
+}
+
+int enable_noise_estimation(VP9_COMP *const cpi) {
+  // Enable noise estimation if denoising is on (and cyclic refresh, since
+  // noise estimate is currently using a struct defined in cyclic refresh).
+#if CONFIG_VP9_TEMPORAL_DENOISING
+  if (cpi->oxcf.noise_sensitivity > 0 &&
+      cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+    return 1;
+#endif
+  // Only allow noise estimate under certain encoding mode.
+  // Enabled for 1 pass CBR, speed >=5, and if resolution is same as original.
+  // Not enabled for SVC mode and screen_content_mode.
+  // Not enabled for low resolutions.
+  if (cpi->oxcf.pass == 0 &&
+      cpi->oxcf.rc_mode == VPX_CBR &&
+      cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+      cpi->oxcf.speed >= 5 &&
+      cpi->resize_state == ORIG &&
+      cpi->resize_pending == 0 &&
+      !cpi->use_svc &&
+      cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
+      cpi->common.width >= 640 &&
+      cpi->common.height >= 480)
+    return 1;
+  else
+    return 0;
+}
+
+static void copy_frame(YV12_BUFFER_CONFIG * const dest,
+                       const YV12_BUFFER_CONFIG * const src) {
+  int r;
+  const uint8_t *srcbuf = src->y_buffer;
+  uint8_t *destbuf = dest->y_buffer;
+
+  assert(dest->y_width == src->y_width);
+  assert(dest->y_height == src->y_height);
+
+  for (r = 0; r < dest->y_height; ++r) {
+    memcpy(destbuf, srcbuf, dest->y_width);
+    destbuf += dest->y_stride;
+    srcbuf += src->y_stride;
+  }
+}
+
+void vp9_update_noise_estimate(VP9_COMP *const cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
+  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+  NOISE_ESTIMATE *const ne = &cpi->noise_estimate;
+  // Estimate of noise level every frame_period frames.
+  int frame_period = 10;
+  int thresh_consec_zeromv = 8;
+  unsigned int thresh_sum_diff = 128;
+  unsigned int thresh_sum_spatial = (200 * 200) << 8;
+  unsigned int thresh_spatial_var = (32 * 32) << 8;
+  int num_frames_estimate = 20;
+  int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7;
+  // Estimate is between current source and last source.
+  YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;
+#if CONFIG_VP9_TEMPORAL_DENOISING
+  if (cpi->oxcf.noise_sensitivity > 0)
+    last_source = &cpi->denoiser.last_source;
+#endif
+  ne->enabled = enable_noise_estimation(cpi);
+  if (!ne->enabled ||
+      cm->current_video_frame % frame_period != 0 ||
+      last_source == NULL ||
+      ne->last_w != cm->width ||
+      ne->last_h != cm->height) {
+#if CONFIG_VP9_TEMPORAL_DENOISING
+  if (cpi->oxcf.noise_sensitivity > 0)
+    copy_frame(&cpi->denoiser.last_source, cpi->Source);
+#endif
+    if (last_source != NULL) {
+      ne->last_w = cm->width;
+      ne->last_h = cm->height;
+    }
+    return;
+  } else {
+    int num_samples = 0;
+    uint64_t avg_est = 0;
+    int bsize = BLOCK_16X16;
+    static const unsigned char const_source[16] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have
+    // been encoded as zero/small mv at least x consecutive frames, compute
+    // the variance to update estimate of noise in the source.
+    const uint8_t *src_y = cpi->Source->y_buffer;
+    const int src_ystride = cpi->Source->y_stride;
+    const uint8_t *last_src_y = last_source->y_buffer;
+    const int last_src_ystride = last_source->y_stride;
+    const uint8_t *src_u = cpi->Source->u_buffer;
+    const uint8_t *src_v = cpi->Source->v_buffer;
+    const int src_uvstride = cpi->Source->uv_stride;
+    const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
+    const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
+    const int uv_width_shift = y_width_shift >> 1;
+    const int uv_height_shift = y_height_shift >> 1;
+    int mi_row, mi_col;
+    for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+      for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
+        // 16x16 blocks, 1/4 sample of frame.
+        if (mi_row % 4 == 0 && mi_col % 4 == 0) {
+          int bl_index = mi_row * cm->mi_cols + mi_col;
+          int bl_index1 = bl_index + 1;
+          int bl_index2 = bl_index + cm->mi_cols;
+          int bl_index3 = bl_index2 + 1;
+          // Only consider blocks that are likely steady background. i.e, have
+          // been encoded as zero/low motion x (= thresh_consec_zeromv) frames
+          // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all
+          // 4 sub-blocks for 16x16 block. Also, avoid skin blocks.
+          const uint8_t ysource =
+            src_y[y_height_shift * src_ystride + y_width_shift];
+          const uint8_t usource =
+            src_u[uv_height_shift * src_uvstride + uv_width_shift];
+          const uint8_t vsource =
+            src_v[uv_height_shift * src_uvstride + uv_width_shift];
+          int is_skin = vp9_skin_pixel(ysource, usource, vsource);
+          if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
+              cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
+              cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv &&
+              cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv &&
+              !is_skin) {
+            // Compute variance.
+            unsigned int sse;
+            unsigned int variance = cpi->fn_ptr[bsize].vf(src_y,
+                                                          src_ystride,
+                                                          last_src_y,
+                                                          last_src_ystride,
+                                                          &sse);
+            // Only consider this block as valid for noise measurement if the
+            // average term (sse - variance = N * avg^{2}, N = 16X16) of the
+            // temporal residual is small (avoid effects from lighting change).
+            if ((sse - variance) < thresh_sum_diff) {
+              unsigned int sse2;
+              const unsigned int spatial_variance =
+                  cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source,
+                                        0, &sse2);
+              // Avoid blocks with high brightness and high spatial variance.
+              if ((sse2 - spatial_variance) < thresh_sum_spatial &&
+                  spatial_variance < thresh_spatial_var) {
+                avg_est += variance / ((spatial_variance >> 9) + 1);
+                num_samples++;
+              }
+            }
+          }
+        }
+        src_y += 8;
+        last_src_y += 8;
+        src_u += 4;
+        src_v += 4;
+      }
+      src_y += (src_ystride << 3) - (cm->mi_cols << 3);
+      last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3);
+      src_u += (src_uvstride << 2) - (cm->mi_cols << 2);
+      src_v += (src_uvstride << 2) - (cm->mi_cols << 2);
+    }
+    ne->last_w = cm->width;
+    ne->last_h = cm->height;
+    // Update noise estimate if we have at a minimum number of block samples,
+    // and avg_est > 0 (avg_est == 0 can happen if the application inputs
+    // duplicate frames).
+    if (num_samples > min_blocks_estimate && avg_est > 0) {
+      // Normalize.
+      avg_est = avg_est / num_samples;
+      // Update noise estimate.
+      ne->value = (int)((3 * ne->value + avg_est) >> 2);
+      ne->count++;
+      if (ne->count == num_frames_estimate) {
+        // Reset counter and check noise level condition.
+        ne->count = 0;
+        if (ne->value > (ne->thresh << 1))
+          ne->level = kHigh;
+        else
+          if (ne->value > ne->thresh)
+            ne->level = kMedium;
+          else
+            ne->level = kLow;
+      }
+    }
+  }
+#if CONFIG_VP9_TEMPORAL_DENOISING
+  if (cpi->oxcf.noise_sensitivity > 0) {
+    copy_frame(&cpi->denoiser.last_source, cpi->Source);
+    vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+  }
+#endif
+}

diff --git a/vp9/encoder/vp9_noise_estimate.h b/vp9/encoder/vp9_noise_estimate.h
new file mode 100644
index 0000000..22885b8
--- /dev/null
+++ b/vp9/encoder/vp9_noise_estimate.h

@@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_NOISE_ESTIMATE_H_
+#define VP9_ENCODER_NOISE_ESTIMATE_H_
+
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_skin_detection.h"
+#include "vpx_scale/yv12config.h"
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+#include "vp9/encoder/vp9_denoiser.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum noise_level {
+  kLow,
+  kMedium,
+  kHigh
+} NOISE_LEVEL;
+
+typedef struct noise_estimate {
+  int enabled;
+  NOISE_LEVEL level;
+  int value;
+  int thresh;
+  int count;
+  int last_w;
+  int last_h;
+} NOISE_ESTIMATE;
+
+struct VP9_COMP;
+
+void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
+                             int width,
+                             int height);
+
+void vp9_update_noise_estimate(struct VP9_COMP *const cpi);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_NOISE_ESTIMATE_H_

diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index fc4d9ae..c495c42 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c

@@ -779,14 +779,20 @@
                                  struct buf_2d yv12_mb[][MAX_MB_PLANE],
                                  int *rate, int64_t *dist) {
   MACROBLOCKD *xd = &x->e_mbd;
-
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
   unsigned int var = var_y, sse = sse_y;
   // Skipping threshold for ac.
   unsigned int thresh_ac;
   // Skipping threshold for dc.
   unsigned int thresh_dc;
-  if (x->encode_breakout > 0) {
+  int motion_low = 1;
+  if (mbmi->mv[0].as_mv.row > 64 ||
+      mbmi->mv[0].as_mv.row < -64 ||
+      mbmi->mv[0].as_mv.col > 64 ||
+      mbmi->mv[0].as_mv.col < -64)
+    motion_low = 0;
+  if (x->encode_breakout > 0 && motion_low == 1) {
     // Set a maximum for threshold to avoid big PSNR loss in low bit rate
     // case. Use extreme low threshold for static frames to limit
     // skipping.
@@ -1068,6 +1074,18 @@
     {GOLDEN_FRAME, NEWMV}
 };
 
+int set_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize) {
+  const VP9_COMMON *const cm = &cpi->common;
+  // Reduce the intra cost penalty for small blocks (<=16x16).
+  int reduction_fac =
+      (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
+  if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
+     // Don't reduce intra cost penalty if estimated noise level is high.
+     reduction_fac = 0;
+  return vp9_get_intra_cost_penalty(
+      cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
+}
+
 // TODO(jingning) placeholder for inter-frame non-RD mode decision.
 // this needs various further optimizations. to be continued..
 void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -1094,11 +1112,7 @@
   // var_y and sse_y are saved to be used in skipping checking
   unsigned int var_y = UINT_MAX;
   unsigned int sse_y = UINT_MAX;
-  // Reduce the intra cost penalty for small blocks (<=16x16).
-  const int reduction_fac = (bsize <= BLOCK_16X16) ?
-      ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
-  const int intra_cost_penalty = vp9_get_intra_cost_penalty(
-      cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
+  const int intra_cost_penalty = set_intra_cost_penalty(cpi, bsize);
   const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
                                            intra_cost_penalty, 0);
   const int *const rd_threshes = cpi->rd.threshes[mbmi->segment_id][bsize];
@@ -1469,6 +1483,20 @@
     this_rdc.rate += ref_frame_cost[ref_frame];
     this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
 
+    // Bias against non-zero (above some threshold) motion for large blocks.
+    // This is temporary fix to avoid selection of large mv for big blocks.
+    if (cpi->oxcf.speed >= 5 &&
+        cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
+        (frame_mv[this_mode][ref_frame].as_mv.row > 64 ||
+        frame_mv[this_mode][ref_frame].as_mv.row < -64 ||
+        frame_mv[this_mode][ref_frame].as_mv.col > 64 ||
+        frame_mv[this_mode][ref_frame].as_mv.col < -64)) {
+      if (bsize == BLOCK_64X64)
+        this_rdc.rdcost = this_rdc.rdcost << 1;
+      else if (bsize >= BLOCK_32X32)
+        this_rdc.rdcost = 3 * this_rdc.rdcost >> 1;
+    }
+
     // Skipping checking: test to see if this block can be reconstructed by
     // prediction only.
     if (cpi->allow_encode_breakout) {

diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 1d13199..0377cb5 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c

@@ -370,8 +370,9 @@
 int vp9_rc_drop_frame(VP9_COMP *cpi) {
   const VP9EncoderConfig *oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
-
-  if (!oxcf->drop_frames_water_mark) {
+  if (!oxcf->drop_frames_water_mark ||
+      (is_one_pass_cbr_svc(cpi) &&
+       cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode)) {
     return 0;
   } else {
     if (rc->buffer_level < 0) {
@@ -1816,6 +1817,11 @@
   RATE_CONTROL *const rc = &cpi->rc;
   int target_rate = rc->base_frame_target;
 
+  if (cpi->common.frame_type == KEY_FRAME)
+    target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
+  else
+    target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
+
   // Correction to rate target based on prior over or under shoot.
   if (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.rc_mode == VPX_CQ)
     vbr_rate_correction(cpi, &target_rate);
@@ -1830,6 +1836,9 @@
   RESIZE_ACTION resize_action = NO_RESIZE;
   int avg_qp_thr1 = 70;
   int avg_qp_thr2 = 50;
+  int min_width = 180;
+  int min_height = 180;
+  int down_size_on = 1;
   cpi->resize_scale_num = 1;
   cpi->resize_scale_den = 1;
   // Don't resize on key frame; reset the counters on key frame.
@@ -1838,6 +1847,21 @@
     cpi->resize_count = 0;
     return 0;
   }
+  // Check current frame reslution to avoid generating frames smaller than
+  // the minimum resolution.
+  if (ONEHALFONLY_RESIZE) {
+    if ((cm->width >> 1) < min_width || (cm->height >> 1) < min_height)
+      down_size_on = 0;
+  } else {
+    if (cpi->resize_state == ORIG &&
+        (cm->width * 3 / 4 < min_width ||
+         cm->height * 3 / 4 < min_height))
+      return 0;
+    else if (cpi->resize_state == THREE_QUARTER &&
+             ((cpi->oxcf.width >> 1) < min_width ||
+              (cpi->oxcf.height >> 1) < min_height))
+      down_size_on = 0;
+  }
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
   // If denoiser is on, apply a smaller qp threshold.
@@ -1849,7 +1873,7 @@
 
   // Resize based on average buffer underflow and QP over some window.
   // Ignore samples close to key frame, since QP is usually high after key.
-  if (cpi->rc.frames_since_key > 1 * cpi->framerate) {
+  if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
     const int window = (int)(4 * cpi->framerate);
     cpi->resize_avg_qp += cm->base_qindex;
     if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
@@ -1864,7 +1888,7 @@
       // down state, i.e. 1/2 or 3/4 of original resolution.
       // Currently, use a flag to turn 3/4 resizing feature on/off.
       if (cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) {
-        if (cpi->resize_state == THREE_QUARTER) {
+        if (cpi->resize_state == THREE_QUARTER && down_size_on) {
           resize_action = DOWN_ONEHALF;
           cpi->resize_state = ONE_HALF;
         } else if (cpi->resize_state == ORIG) {

diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index b085c7a..eda7743 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c

@@ -76,10 +76,12 @@
                       vp9_intra_mode_tree);
 
   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
-  vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
-                  vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
-  vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
-                  fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
+  for (i = 0; i < INTRA_MODES; ++i) {
+    vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
+                    vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
+    vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
+                    fc->uv_mode_prob[i], vp9_intra_mode_tree);
+  }
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     vp9_cost_tokens(cpi->switchable_interp_costs[i],

diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 1944291..a400501 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c

@@ -296,30 +296,11 @@
                                       const tran_low_t *dqcoeff,
                                       intptr_t block_size,
                                       int64_t *ssz) {
-  int i;
-  int32_t c, d;
-  int64_t error = 0, sqcoeff = 0;
-  int16_t diff;
-
-  const int32_t hi = 0x00007fff;
-  const int32_t lo = 0xffff8000;
-
-  for (i = 0; i < block_size; i++) {
-    c = coeff[i];
-    d = dqcoeff[i];
-
-    // Saturate to 16 bits
-    c = (c > hi) ? hi : ((c < lo) ? lo : c);
-    d = (d > hi) ? hi : ((d < lo) ? lo : d);
-
-    diff = d - c;
-    error +=  diff * diff;
-    sqcoeff += c * c;
-  }
-  assert(error >= 0 && sqcoeff >= 0);
-
-  *ssz = sqcoeff;
-  return error;
+  // Note that the C versions of these 2 functions (vp9_block_error and
+  // vp9_highbd_block_error_8bit are the same, but the optimized assembly
+  // routines are not compatible in the non high bitdepth configuration, so
+  // they still cannot share the same name.
+  return vp9_block_error_c(coeff, dqcoeff, block_size, ssz);
 }
 
 static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff,
@@ -1218,7 +1199,8 @@
                           &this_distortion, &s, &this_sse, bsize, best_rd))
       continue;
     this_rate = this_rate_tokenonly +
-                cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
+        cpi->intra_uv_mode_cost[cpi->common.frame_type]
+                                [xd->mi[0]->mbmi.mode][mode];
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 
     if (this_rd < best_rd) {
@@ -1248,7 +1230,9 @@
   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
                    skippable, &unused, bsize, INT64_MAX);
-  *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
+  *rate = *rate_tokenonly +
+      cpi->intra_uv_mode_cost[cm->frame_type]
+                              [x->e_mbd.mi[0]->mbmi.mode][DC_PRED];
   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
 }
 

diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c
index 799f179..7aa8fc3 100644
--- a/vp9/encoder/vp9_subexp.c
+++ b/vp9/encoder/vp9_subexp.c

@@ -16,7 +16,7 @@
 
 #define vp9_cost_upd256  ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
 
-static const int update_bits[255] = {
+static const uint8_t update_bits[255] = {
    5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
    6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
@@ -46,7 +46,7 @@
 
 static int remap_prob(int v, int m) {
   int i;
-  static const int map_table[MAX_PROB - 1] = {
+  static const uint8_t map_table[MAX_PROB - 1] = {
     // generated by:
     //   map_table[j] = split_index(j, MAX_PROB - 1, MODULUS_PARAM);
      20,  21,  22,  23,  24,  25,   0,  26,  27,  28,  29,  30,  31,  32,  33,

diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 8a6818c..1dfc45c 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -31,6 +31,7 @@
   svc->spatial_layer_id = 0;
   svc->temporal_layer_id = 0;
   svc->first_spatial_layer_to_encode = 0;
+  svc->rc_drop_superframe = 0;
 
   if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
     if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img,
@@ -568,6 +569,9 @@
     }
   }
 
+  if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode)
+    cpi->svc.rc_drop_superframe = 0;
+
   lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
                                cpi->svc.number_temporal_layers +
                                cpi->svc.temporal_layer_id];

diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 694b5ab..5dbf9b4 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -56,6 +56,7 @@
 
   int spatial_layer_to_encode;
   int first_spatial_layer_to_encode;
+  int rc_drop_superframe;
 
   // Workaround for multiple frame contexts
   enum {

diff --git a/vp9/encoder/x86/vp9_highbd_error_avx.asm b/vp9/encoder/x86/vp9_highbd_error_avx.asm
new file mode 100644
index 0000000..e476323
--- /dev/null
+++ b/vp9/encoder/x86/vp9_highbd_error_avx.asm

@@ -0,0 +1,261 @@
+;
+;  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+%define private_prefix vp9
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+ALIGN 16
+
+;
+; int64_t vp9_highbd_block_error_8bit(int32_t *coeff, int32_t *dqcoeff,
+;                                     intptr_t block_size, int64_t *ssz)
+;
+
+INIT_XMM avx
+cglobal highbd_block_error_8bit, 4, 5, 8, uqc, dqc, size, ssz
+  vzeroupper
+
+  ; If only one iteration is required, then handle this as a special case.
+  ; It is the most frequent case, so we can have a significant gain here
+  ; by not setting up a loop and accumulators.
+  cmp    sizeq, 16
+  jne   .generic
+
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+  ;; Common case of size == 16
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  ; Load input vectors
+  mova      xm0, [dqcq]
+  packssdw  xm0, [dqcq+16]
+  mova      xm2, [uqcq]
+  packssdw  xm2, [uqcq+16]
+
+  mova      xm1, [dqcq+32]
+  packssdw  xm1, [dqcq+48]
+  mova      xm3, [uqcq+32]
+  packssdw  xm3, [uqcq+48]
+
+  ; Compute the errors.
+  psubw     xm0, xm2
+  psubw     xm1, xm3
+
+  ; Individual errors are max 15bit+sign, so squares are 30bit, and
+  ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit).
+  pmaddwd   xm2, xm2
+  pmaddwd   xm3, xm3
+
+  pmaddwd   xm0, xm0
+  pmaddwd   xm1, xm1
+
+  ; Squares are always positive, so we can use unsigned arithmetic after
+  ; squaring. As mentioned earlier 2 sums fit in 31 bits, so 4 sums will
+  ; fit in 32bits
+  paddd     xm2, xm3
+  paddd     xm0, xm1
+
+  ; Accumulate horizontally in 64 bits, there is no chance of overflow here
+  pxor      xm5, xm5
+
+  pblendw   xm3, xm5, xm2, 0x33 ; Zero extended  low of a pair of 32 bits
+  psrlq     xm2, 32             ; Zero extended high of a pair of 32 bits
+
+  pblendw   xm1, xm5, xm0, 0x33 ; Zero extended  low of a pair of 32 bits
+  psrlq     xm0, 32             ; Zero extended high of a pair of 32 bits
+
+  paddq     xm2, xm3
+  paddq     xm0, xm1
+
+  psrldq    xm3, xm2, 8
+  psrldq    xm1, xm0, 8
+
+  paddq     xm2, xm3
+  paddq     xm0, xm1
+
+  ; Store the return value
+%if ARCH_X86_64
+  movq      rax, xm0
+  movq   [sszq], xm2
+%else
+  movd      eax, xm0
+  pextrd    edx, xm0, 1
+  movq   [sszd], xm2
+%endif
+  RET
+
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+  ;; Generic case of size != 16, speculative low precision
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+  ALIGN 16
+.generic:
+  pxor      xm4, xm4                ; sse accumulator
+  pxor      xm5, xm5                ; overflow detection register for xm4
+  pxor      xm6, xm6                ; ssz accumulator
+  pxor      xm7, xm7                ; overflow detection register for xm6
+  lea      uqcq, [uqcq+sizeq*4]
+  lea      dqcq, [dqcq+sizeq*4]
+  neg     sizeq
+
+  ; Push the negative size as the high precision code might need it
+  push    sizeq
+
+.loop:
+  ; Load input vectors
+  mova      xm0, [dqcq+sizeq*4]
+  packssdw  xm0, [dqcq+sizeq*4+16]
+  mova      xm2, [uqcq+sizeq*4]
+  packssdw  xm2, [uqcq+sizeq*4+16]
+
+  mova      xm1, [dqcq+sizeq*4+32]
+  packssdw  xm1, [dqcq+sizeq*4+48]
+  mova      xm3, [uqcq+sizeq*4+32]
+  packssdw  xm3, [uqcq+sizeq*4+48]
+
+  add     sizeq, 16
+
+  ; Compute the squared errors.
+  ; Individual errors are max 15bit+sign, so squares are 30bit, and
+  ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit).
+  psubw     xm0, xm2
+  pmaddwd   xm2, xm2
+  pmaddwd   xm0, xm0
+
+  psubw     xm1, xm3
+  pmaddwd   xm3, xm3
+  pmaddwd   xm1, xm1
+
+  ; Squares are always positive, so we can use unsigned arithmetic after
+  ; squaring. As mentioned earlier 2 sums fit in 31 bits, so 4 sums will
+  ; fit in 32bits
+  paddd     xm2, xm3
+  paddd     xm0, xm1
+
+  ; We accumulate using 32 bit arithmetic, but detect potential overflow
+  ; by checking if the MSB of the accumulators have ever been a set bit.
+  ; If yes, we redo the whole compute at the end on higher precision, but
+  ; this happens extremely rarely, so we still achieve a net gain.
+  paddd     xm4, xm0
+  paddd     xm6, xm2
+  por       xm5, xm4  ; OR in the accumulator for overflow detection
+  por       xm7, xm6  ; OR in the accumulator for overflow detection
+
+  jnz .loop
+
+  ; Add pairs horizontally (still only on 32 bits)
+  phaddd    xm4, xm4
+  por       xm5, xm4  ; OR in the accumulator for overflow detection
+  phaddd    xm6, xm6
+  por       xm7, xm6  ; OR in the accumulator for overflow detection
+
+  ; Check for possibility of overflow by testing if bit 32 of each dword lane
+  ; have ever been set. If they were not, then there was no overflow and the
+  ; final sum will fit in 32 bits. If overflow happened, then
+  ; we redo the whole computation on higher precision.
+  por       xm7, xm5
+  pmovmskb   r4, xm7
+  test       r4, 0x8888
+  jnz .highprec
+
+  phaddd    xm4, xm4
+  phaddd    xm6, xm6
+  pmovzxdq  xm4, xm4
+  pmovzxdq  xm6, xm6
+
+  ; Restore stack
+  pop     sizeq
+
+  ; Store the return value
+%if ARCH_X86_64
+  movq      rax, xm4
+  movq   [sszq], xm6
+%else
+  movd      eax, xm4
+  pextrd    edx, xm4, 1
+  movq   [sszd], xm6
+%endif
+  RET
+
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+  ;; Generic case of size != 16, high precision case
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+.highprec:
+  pxor      xm4, xm4                 ; sse accumulator
+  pxor      xm5, xm5                 ; dedicated zero register
+  pxor      xm6, xm6                 ; ssz accumulator
+  pop     sizeq
+
+.loophp:
+  mova      xm0, [dqcq+sizeq*4]
+  packssdw  xm0, [dqcq+sizeq*4+16]
+  mova      xm2, [uqcq+sizeq*4]
+  packssdw  xm2, [uqcq+sizeq*4+16]
+
+  mova      xm1, [dqcq+sizeq*4+32]
+  packssdw  xm1, [dqcq+sizeq*4+48]
+  mova      xm3, [uqcq+sizeq*4+32]
+  packssdw  xm3, [uqcq+sizeq*4+48]
+
+  add     sizeq, 16
+
+  ; individual errors are max. 15bit+sign, so squares are 30bit, and
+  ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
+
+  psubw     xm0, xm2
+  pmaddwd   xm2, xm2
+  pmaddwd   xm0, xm0
+
+  psubw     xm1, xm3
+  pmaddwd   xm3, xm3
+  pmaddwd   xm1, xm1
+
+  ; accumulate in 64bit
+  punpckldq xm7, xm0, xm5
+  punpckhdq xm0, xm5
+  paddq     xm4, xm7
+
+  punpckldq xm7, xm2, xm5
+  punpckhdq xm2, xm5
+  paddq     xm6, xm7
+
+  punpckldq xm7, xm1, xm5
+  punpckhdq xm1, xm5
+  paddq     xm4, xm7
+
+  punpckldq xm7, xm3, xm5
+  punpckhdq xm3, xm5
+  paddq     xm6, xm7
+
+  paddq     xm4, xm0
+  paddq     xm4, xm1
+  paddq     xm6, xm2
+  paddq     xm6, xm3
+
+  jnz .loophp
+
+  ; Accumulate horizontally
+  movhlps   xm5, xm4
+  movhlps   xm7, xm6
+  paddq     xm4, xm5
+  paddq     xm6, xm7
+
+  ; Store the return value
+%if ARCH_X86_64
+  movq      rax, xm4
+  movq   [sszq], xm6
+%else
+  movd      eax, xm4
+  pextrd    edx, xm4, 1
+  movq   [sszd], xm6
+%endif
+  RET
+
+END

diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index e36a044..6ccba0f 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c

@@ -45,7 +45,7 @@
   vpx_bit_depth_t             bit_depth;
   vp9e_tune_content           content;
   vpx_color_space_t           color_space;
-  int                         color_range;
+  vpx_color_range_t           color_range;
   int                         render_width;
   int                         render_height;
 };
@@ -327,7 +327,8 @@
     ERROR("Codec bit-depth 8 not supported in profile > 1");
   }
   RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB);
-  RANGE_CHECK(extra_cfg, color_range, 0, 2);
+  RANGE_CHECK(extra_cfg, color_range,
+              VPX_CR_STUDIO_RANGE, VPX_CR_FULL_RANGE);
   return VPX_CODEC_OK;
 }
 
@@ -1266,30 +1267,6 @@
   }
 }
 
-static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx,
-                                           va_list args) {
-  const int update = va_arg(args, int);
-
-  vp9_update_entropy(ctx->cpi, update);
-  return VPX_CODEC_OK;
-}
-
-static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx,
-                                             va_list args) {
-  const int ref_frame_flags = va_arg(args, int);
-
-  vp9_update_reference(ctx->cpi, ref_frame_flags);
-  return VPX_CODEC_OK;
-}
-
-static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx,
-                                          va_list args) {
-  const int reference_flag = va_arg(args, int);
-
-  vp9_use_as_reference(ctx->cpi, reference_flag);
-  return VPX_CODEC_OK;
-}
-
 static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
                                         va_list args) {
   (void)ctx;
@@ -1482,9 +1459,6 @@
 
 static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP8_COPY_REFERENCE,                ctrl_copy_reference},
-  {VP8E_UPD_ENTROPY,                  ctrl_update_entropy},
-  {VP8E_UPD_REFERENCE,                ctrl_update_reference},
-  {VP8E_USE_REFERENCE,                ctrl_use_reference},
 
   // Setters
   {VP8_SET_REFERENCE,                 ctrl_set_reference},

diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index c6b1ba9..be5d160 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c

@@ -24,62 +24,13 @@
 #include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_frame_buffers.h"
 
-#include "vp9/decoder/vp9_decoder.h"
 #include "vp9/decoder/vp9_decodeframe.h"
 
+#include "vp9/vp9_dx_iface.h"
 #include "vp9/vp9_iface_common.h"
 
 #define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
 
-typedef vpx_codec_stream_info_t vp9_stream_info_t;
-
-// This limit is due to framebuffer numbers.
-// TODO(hkuang): Remove this limit after implementing ondemand framebuffers.
-#define FRAME_CACHE_SIZE 6   // Cache maximum 6 decoded frames.
-
-typedef struct cache_frame {
-  int fb_idx;
-  vpx_image_t img;
-} cache_frame;
-
-struct vpx_codec_alg_priv {
-  vpx_codec_priv_t        base;
-  vpx_codec_dec_cfg_t     cfg;
-  vp9_stream_info_t       si;
-  int                     postproc_cfg_set;
-  vp8_postproc_cfg_t      postproc_cfg;
-  vpx_decrypt_cb          decrypt_cb;
-  void                    *decrypt_state;
-  vpx_image_t             img;
-  int                     img_avail;
-  int                     flushed;
-  int                     invert_tile_order;
-  int                     last_show_frame;  // Index of last output frame.
-  int                     byte_alignment;
-  int                     skip_loop_filter;
-
-  // Frame parallel related.
-  int                     frame_parallel_decode;  // frame-based threading.
-  VPxWorker               *frame_workers;
-  int                     num_frame_workers;
-  int                     next_submit_worker_id;
-  int                     last_submit_worker_id;
-  int                     next_output_worker_id;
-  int                     available_threads;
-  cache_frame             frame_cache[FRAME_CACHE_SIZE];
-  int                     frame_cache_write;
-  int                     frame_cache_read;
-  int                     num_cache_frames;
-  int                     need_resync;      // wait for key/intra-only frame
-  // BufferPool that holds all reference frames. Shared by all the FrameWorkers.
-  BufferPool              *buffer_pool;
-
-  // External frame buffer info to save for VP9 common.
-  void *ext_priv;  // Private data associated with the external frame buffers.
-  vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
-  vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
-};
-
 static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx,
                                     vpx_codec_priv_enc_mr_cfg_t *data) {
   // This function only allocates space for the vpx_codec_alg_priv_t

diff --git a/vp9/vp9_dx_iface.h b/vp9/vp9_dx_iface.h
new file mode 100644
index 0000000..e0e948e
--- /dev/null
+++ b/vp9/vp9_dx_iface.h

@@ -0,0 +1,65 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_VP9_DX_IFACE_H_
+#define VP9_VP9_DX_IFACE_H_
+
+#include "vp9/decoder/vp9_decoder.h"
+
+typedef vpx_codec_stream_info_t vp9_stream_info_t;
+
+// This limit is due to framebuffer numbers.
+// TODO(hkuang): Remove this limit after implementing ondemand framebuffers.
+#define FRAME_CACHE_SIZE 6   // Cache maximum 6 decoded frames.
+
+typedef struct cache_frame {
+  int fb_idx;
+  vpx_image_t img;
+} cache_frame;
+
+struct vpx_codec_alg_priv {
+  vpx_codec_priv_t        base;
+  vpx_codec_dec_cfg_t     cfg;
+  vp9_stream_info_t       si;
+  int                     postproc_cfg_set;
+  vp8_postproc_cfg_t      postproc_cfg;
+  vpx_decrypt_cb          decrypt_cb;
+  void                    *decrypt_state;
+  vpx_image_t             img;
+  int                     img_avail;
+  int                     flushed;
+  int                     invert_tile_order;
+  int                     last_show_frame;  // Index of last output frame.
+  int                     byte_alignment;
+  int                     skip_loop_filter;
+
+  // Frame parallel related.
+  int                     frame_parallel_decode;  // frame-based threading.
+  VPxWorker               *frame_workers;
+  int                     num_frame_workers;
+  int                     next_submit_worker_id;
+  int                     last_submit_worker_id;
+  int                     next_output_worker_id;
+  int                     available_threads;
+  cache_frame             frame_cache[FRAME_CACHE_SIZE];
+  int                     frame_cache_write;
+  int                     frame_cache_read;
+  int                     num_cache_frames;
+  int                     need_resync;      // wait for key/intra-only frame
+  // BufferPool that holds all reference frames. Shared by all the FrameWorkers.
+  BufferPool              *buffer_pool;
+
+  // External frame buffer info to save for VP9 common.
+  void *ext_priv;  // Private data associated with the external frame buffers.
+  vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
+  vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
+};
+
+#endif  // VP9_VP9_DX_IFACE_H_

diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index a2cbacf..3f3bdef 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk

@@ -82,6 +82,8 @@
 VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h
 VP9_CX_SRCS-yes += encoder/vp9_skin_detection.c
 VP9_CX_SRCS-yes += encoder/vp9_skin_detection.h
+VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.c
+VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.h
 ifeq ($(CONFIG_VP9_POSTPROC),yes)
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c
@@ -102,6 +104,7 @@
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
 ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm
+VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm
 else
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
 endif

diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index 0e9cf16..4c6fd00 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk

@@ -16,6 +16,7 @@
 VP9_DX_SRCS_REMOVE-no  += $(VP9_COMMON_SRCS_REMOVE-no)
 
 VP9_DX_SRCS-yes += vp9_dx_iface.c
+VP9_DX_SRCS-yes += vp9_dx_iface.h
 
 VP9_DX_SRCS-yes += decoder/vp9_decodemv.c
 VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c

diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index ff60083..ff7c10a 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c

@@ -485,6 +485,7 @@
     enc_cfg->rc_buf_initial_sz = 500;
     enc_cfg->rc_buf_optimal_sz = 600;
     enc_cfg->rc_buf_sz = 1000;
+    enc_cfg->rc_dropframe_thresh = 0;
   }
 
   if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)

diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 287ecfe..bd99c6d 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h

@@ -141,29 +141,11 @@
  * \sa #vpx_codec_control
  */
 enum vp8e_enc_control_id {
-  /*!\brief Codec control function to set mode of entropy update in encoder.
-   *
-   * Supported in codecs: VP8, VP9
-   */
-  VP8E_UPD_ENTROPY           = 5,
-
-  /*!\brief Codec control function to set reference update mode in encoder.
-   *
-   * Supported in codecs: VP8, VP9
-   */
-  VP8E_UPD_REFERENCE,
-
-  /*!\brief Codec control function to set which reference frame encoder can use.
-   *
-   * Supported in codecs: VP8, VP9
-   */
-  VP8E_USE_REFERENCE,
-
   /*!\brief Codec control function to pass an ROI map to encoder.
    *
    * Supported in codecs: VP8, VP9
    */
-  VP8E_SET_ROI_MAP,
+  VP8E_SET_ROI_MAP           = 8,
 
   /*!\brief Codec control function to pass an Active map to encoder.
    *
@@ -721,17 +703,6 @@
  *
  */
 
-
-/* These controls have been deprecated in favor of the flags parameter to
- * vpx_codec_encode(). See the definition of VP8_EFLAG_* above.
- */
-VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_UPD_ENTROPY,            int)
-#define VPX_CTRL_VP8E_UPD_ENTROPY
-VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_UPD_REFERENCE,          int)
-#define VPX_CTRL_VP8E_UPD_REFERENCE
-VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_USE_REFERENCE,          int)
-#define VPX_CTRL_VP8E_USE_REFERENCE
-
 VPX_CTRL_USE_TYPE(VP8E_SET_FRAME_FLAGS,        int)
 #define VPX_CTRL_VP8E_SET_FRAME_FLAGS
 VPX_CTRL_USE_TYPE(VP8E_SET_TEMPORAL_LAYER_ID,  int)

diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 51100599..e9e952c 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h

@@ -78,11 +78,17 @@
     VPX_CS_SRGB       = 7   /**< sRGB */
   } vpx_color_space_t; /**< alias for enum vpx_color_space */
 
+  /*!\brief List of supported color range */
+  typedef enum vpx_color_range {
+    VPX_CR_STUDIO_RANGE = 0,    /**< Y [16..235], UV [16..240] */
+    VPX_CR_FULL_RANGE   = 1     /**< YUV/RGB [0..255] */
+  } vpx_color_range_t; /**< alias for enum vpx_color_range */
+
   /**\brief Image Descriptor */
   typedef struct vpx_image {
     vpx_img_fmt_t fmt; /**< Image Format */
     vpx_color_space_t cs; /**< Color Space */
-    int range; /**< Limited (0) vs. Full-range (1) sample data */
+    vpx_color_range_t range; /**< Color Range */
 
     /* Image storage dimensions */
     unsigned int  w;           /**< Stored image width */

diff --git a/vpx_dsp/intrapred.c b/vpx_dsp/intrapred.c
index a5e3d30..a9669e5 100644
--- a/vpx_dsp/intrapred.c
+++ b/vpx_dsp/intrapred.c

@@ -44,6 +44,21 @@
       dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
 }
 
+static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                   const uint8_t *above, const uint8_t *left) {
+  int r, c;
+  (void) above;
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1],
+                            left[(c >> 1) + r + 2])
+          : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]);
+    }
+    dst += stride;
+  }
+}
+
 static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                  const uint8_t *above, const uint8_t *left) {
   int r, c;
@@ -61,6 +76,20 @@
   }
 }
 
+static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  int r, c;
+  (void) left;
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1],
+                            above[(r >> 1) + c + 2])
+          : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]);
+    }
+    dst += stride;
+  }
+}
+
 static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                  const uint8_t *above, const uint8_t *left) {
   const uint8_t above_right = above[bs - 1];
@@ -80,6 +109,19 @@
   }
 }
 
+static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  int r, c;
+  (void) left;
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      dst[c] = AVG3(above[r + c], above[r + c + 1],
+                    above[r + c + 1 + (r + c + 2 < bs * 2)]);
+    }
+    dst += stride;
+  }
+}
+
 static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                   const uint8_t *above, const uint8_t *left) {
   int r, c;
@@ -319,7 +361,7 @@
               DST(3, 3) = AVG3(E, F, G);  // differs from vp8
 }
 
-void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
                               const uint8_t *above, const uint8_t *left) {
   const int A = above[0];
   const int B = above[1];
@@ -486,6 +528,23 @@
   }
 }
 
+static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride,
+                                          int bs, const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  int r, c;
+  (void) above;
+  (void) bd;
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1],
+                            left[(c >> 1) + r + 2])
+          : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]);
+    }
+    dst += stride;
+  }
+}
+
 static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride,
                                         int bs, const uint16_t *above,
                                         const uint16_t *left, int bd) {
@@ -502,6 +561,8 @@
   }
 }
 
+#define highbd_d63e_predictor highbd_d63_predictor
+
 static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
                                         const uint16_t *above,
                                         const uint16_t *left, int bd) {
@@ -527,7 +588,7 @@
   for (r = 0; r < bs; ++r) {
     for (c = 0; c < bs; ++c) {
       dst[c] = AVG3(above[r + c], above[r + c + 1],
-                    above[r + c + 1 + (r + c + 2 < 8)]);
+                    above[r + c + 1 + (r + c + 2 < bs * 2)]);
     }
     dst += stride;
   }
@@ -771,6 +832,11 @@
 intra_pred_no_4x4(d207)
 intra_pred_no_4x4(d63)
 intra_pred_no_4x4(d45)
+#if CONFIG_MISC_FIXES
+intra_pred_allsizes(d207e)
+intra_pred_allsizes(d63e)
+intra_pred_no_4x4(d45e)
+#endif
 intra_pred_no_4x4(d117)
 intra_pred_no_4x4(d135)
 intra_pred_no_4x4(d153)
@@ -781,7 +847,4 @@
 intra_pred_allsizes(dc_left)
 intra_pred_allsizes(dc_top)
 intra_pred_allsizes(dc)
-#if CONFIG_VP9_HIGHBITDEPTH && CONFIG_MISC_FIXES
-intra_pred_highbd_sized(d45e, 4)
-#endif
 #undef intra_pred_allsizes

diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 31d8c75..9620eaa 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk

@@ -248,7 +248,8 @@
 endif
 ifeq ($(ARCH_X86_64),yes)
 ifeq ($(CONFIG_USE_X86INC),yes)
-DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
+DSP_SRCS-$(HAVE_SSSE3)  += x86/quantize_ssse3_x86_64.asm
+DSP_SRCS-$(HAVE_AVX)    += x86/quantize_avx_x86_64.asm
 endif
 endif
 endif  # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER

diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 6fb680c..7402d38 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -57,6 +57,9 @@
 add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_4x4/;
+
 add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
 
@@ -69,6 +72,9 @@
 add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d63e_predictor_4x4/;
 
+add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63f_predictor_4x4/;
+
 add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
 
@@ -108,12 +114,21 @@
 add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_8x8/;
+
 add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_8x8/;
+
 add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_8x8/;
+
 add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
 
@@ -147,12 +162,21 @@
 add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d207e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_16x16/;
+
 add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d45e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_16x16/;
+
 add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_16x16/;
+
 add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
 
@@ -186,12 +210,21 @@
 add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d207e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_32x32/;
+
 add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d45e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_32x32/;
+
 add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_32x32/;
+
 add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
 
@@ -227,6 +260,9 @@
   add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d207_predictor_4x4/;
 
+  add_proto qw/void vpx_highbd_d207e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d207e_predictor_4x4/;
+
   add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d45_predictor_4x4/;
 
@@ -236,6 +272,9 @@
   add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d63_predictor_4x4/;
 
+  add_proto qw/void vpx_highbd_d63e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d63e_predictor_4x4/;
+
   add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_h_predictor_4x4/;
 
@@ -269,12 +308,21 @@
   add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d207_predictor_8x8/;
 
+  add_proto qw/void vpx_highbd_d207e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d207e_predictor_8x8/;
+
   add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d45_predictor_8x8/;
 
+  add_proto qw/void vpx_highbd_d45e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d45e_predictor_8x8/;
+
   add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d63_predictor_8x8/;
 
+  add_proto qw/void vpx_highbd_d63e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d63e_predictor_8x8/;
+
   add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_h_predictor_8x8/;
 
@@ -308,12 +356,21 @@
   add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d207_predictor_16x16/;
 
+  add_proto qw/void vpx_highbd_d207e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d207e_predictor_16x16/;
+
   add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d45_predictor_16x16/;
 
+  add_proto qw/void vpx_highbd_d45e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d45e_predictor_16x16/;
+
   add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d63_predictor_16x16/;
 
+  add_proto qw/void vpx_highbd_d63e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d63e_predictor_16x16/;
+
   add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_h_predictor_16x16/;
 
@@ -347,12 +404,21 @@
   add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d207_predictor_32x32/;
 
+  add_proto qw/void vpx_highbd_d207e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d207e_predictor_32x32/;
+
   add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d45_predictor_32x32/;
 
+  add_proto qw/void vpx_highbd_d45e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d45e_predictor_32x32/;
+
   add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d63_predictor_32x32/;
 
+  add_proto qw/void vpx_highbd_d63e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d63e_predictor_32x32/;
+
   add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_h_predictor_32x32/;
 
@@ -847,25 +913,19 @@
 # Quantization
 #
 if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc";
+  specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
 
   add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
+  specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
 
-  add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vpx_highbd_quantize_b sse2/;
+  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vpx_highbd_quantize_b sse2/;
 
-  add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
-} else {
-  add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc";
-
-  add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
-}  # CONFIG_VP9_HIGHBITDEPTH
+    add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
+  }  # CONFIG_VP9_HIGHBITDEPTH
 }  # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
 
 if (vpx_config("CONFIG_ENCODERS") eq "yes") {

diff --git a/vpx_dsp/x86/quantize_avx_x86_64.asm b/vpx_dsp/x86/quantize_avx_x86_64.asm
new file mode 100644
index 0000000..01c4129
--- /dev/null
+++ b/vpx_dsp/x86/quantize_avx_x86_64.asm

@@ -0,0 +1,544 @@
+;
+;  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+%macro QUANTIZE_FN 2
+cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
+                                shift, qcoeff, dqcoeff, dequant, \
+                                eob, scan, iscan
+
+  vzeroupper
+
+  ; If we can skip this block, then just zero the output
+  cmp                         skipmp, 0
+  jne .blank
+
+%ifnidn %1, b_32x32
+
+  ; Special case for ncoeff == 16, as it is frequent and we can save on
+  ; not setting up a loop.
+  cmp                       ncoeffmp, 16
+  jne .generic
+
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+  ;; Special case of ncoeff == 16
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+.single:
+
+  movifnidn                   coeffq, coeffmp
+  movifnidn                    zbinq, zbinmp
+  mova                            m0, [zbinq]              ; m0 = zbin
+
+  ; Get DC and first 15 AC coeffs - in this special case, that is all.
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; coeff stored as 32bit numbers but we process them as 16 bit numbers
+  mova                            m9, [coeffq]
+  packssdw                        m9, [coeffq+16]          ; m9 = c[i]
+  mova                           m10, [coeffq+32]
+  packssdw                       m10, [coeffq+48]          ; m10 = c[i]
+%else
+  mova                            m9, [coeffq]             ; m9 = c[i]
+  mova                           m10, [coeffq+16]          ; m10 = c[i]
+%endif
+
+  mov                             r0, eobmp                ; Output pointer
+  mov                             r1, qcoeffmp             ; Output pointer
+  mov                             r2, dqcoeffmp            ; Output pointer
+
+  pxor                            m5, m5                   ; m5 = dedicated zero
+
+  pcmpeqw                         m4, m4                   ; All word lanes -1
+  paddw                           m0, m4                   ; m0 = zbin - 1
+
+  pabsw                           m6, m9                   ; m6 = abs(m9)
+  pabsw                          m11, m10                  ; m11 = abs(m10)
+  pcmpgtw                         m7, m6, m0               ; m7 = c[i] >= zbin
+  punpckhqdq                      m0, m0
+  pcmpgtw                        m12, m11, m0              ; m12 = c[i] >= zbin
+
+  ; Check if all coeffs are less than zbin. If yes, we just write zeros
+  ; to the outputs and we are done.
+  por                            m14, m7, m12
+  ptest                          m14, m14
+  jnz .single_nonzero
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  mova                       [r1   ], ymm5
+  mova                       [r1+32], ymm5
+  mova                       [r2   ], ymm5
+  mova                       [r2+32], ymm5
+%else
+  mova                          [r1], ymm5
+  mova                          [r2], ymm5
+%endif
+  mov                           [r0], word 0
+
+  vzeroupper
+  RET
+
+.single_nonzero:
+
+  ; Actual quantization of size 16 block - setup pointers, rounders, etc.
+  movifnidn                       r4, roundmp
+  movifnidn                       r5, quantmp
+  mov                             r3, dequantmp
+  mov                             r6, shiftmp
+  mova                            m1, [r4]              ; m1 = round
+  mova                            m2, [r5]              ; m2 = quant
+  mova                            m3, [r3]              ; m3 = dequant
+  mova                            m4, [r6]              ; m4 = shift
+
+  mov                             r3, iscanmp
+
+  DEFINE_ARGS eob, qcoeff, dqcoeff, iscan
+
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  paddsw                          m6, m1                   ; m6 += round
+  punpckhqdq                      m1, m1
+  paddsw                         m11, m1                   ; m11 += round
+  pmulhw                          m8, m6, m2               ; m8 = m6*q>>16
+  punpckhqdq                      m2, m2
+  pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
+  paddw                           m8, m6                   ; m8 += m6
+  paddw                          m13, m11                  ; m13 += m11
+  pmulhw                          m8, m4                   ; m8 = m8*qsh>>16
+  punpckhqdq                      m4, m4
+  pmulhw                         m13, m4                   ; m13 = m13*qsh>>16
+  psignw                          m8, m9                   ; m8 = reinsert sign
+  psignw                         m13, m10                  ; m13 = reinsert sign
+  pand                            m8, m7
+  pand                           m13, m12
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; Store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+  pcmpgtw                         m6, m5, m8
+  punpckhwd                       m6, m8, m6
+  pmovsxwd                       m11, m8
+  mova                  [qcoeffq   ], m11
+  mova                  [qcoeffq+16], m6
+  pcmpgtw                         m6, m5, m13
+  punpckhwd                       m6, m13, m6
+  pmovsxwd                       m11, m13
+  mova                  [qcoeffq+32], m11
+  mova                  [qcoeffq+48], m6
+%else
+  mova                  [qcoeffq   ], m8
+  mova                  [qcoeffq+16], m13
+%endif
+
+  pmullw                          m8, m3                   ; dqc[i] = qc[i] * q
+  punpckhqdq                      m3, m3
+  pmullw                         m13, m3                   ; dqc[i] = qc[i] * q
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; Store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+  pcmpgtw                         m6, m5, m8
+  punpckhwd                       m6, m8, m6
+  pmovsxwd                       m11, m8
+  mova                 [dqcoeffq   ], m11
+  mova                 [dqcoeffq+16], m6
+  pcmpgtw                         m6, m5, m13
+  punpckhwd                       m6, m13, m6
+  pmovsxwd                       m11, m13
+  mova                 [dqcoeffq+32], m11
+  mova                 [dqcoeffq+48], m6
+%else
+  mova                 [dqcoeffq   ], m8
+  mova                 [dqcoeffq+16], m13
+%endif
+
+  mova                            m6, [iscanq]            ; m6 = scan[i]
+  mova                           m11, [iscanq+16]         ; m11 = scan[i]
+
+  pcmpeqw                         m8,  m8,  m5            ; m8 = c[i] == 0
+  pcmpeqw                        m13, m13,  m5            ; m13 = c[i] == 0
+  psubw                           m6,  m6,  m7            ; m6 = scan[i] + 1
+  psubw                          m11, m11, m12            ; m11 = scan[i] + 1
+  pandn                           m8,  m8,  m6            ; m8 = max(eob)
+  pandn                          m13, m13, m11            ; m13 = max(eob)
+  pmaxsw                          m8,  m8, m13
+
+  ; Horizontally accumulate/max eobs and write into [eob] memory pointer
+  pshufd                          m7, m8, 0xe
+  pmaxsw                          m8, m7
+  pshuflw                         m7, m8, 0xe
+  pmaxsw                          m8, m7
+  pshuflw                         m7, m8, 0x1
+  pmaxsw                          m8, m7
+  movq                           rax, m8
+  mov                         [eobq], ax
+
+  vzeroupper
+  RET
+
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+  ;; Generic case of ncoeff != 16
+  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+.generic:
+
+%endif ; %ifnidn %1, b_32x32
+
+DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \
+            qcoeff, dqcoeff, dequant, eob, scan, iscan
+
+  ; Actual quantization loop - setup pointers, rounders, etc.
+  movifnidn                   coeffq, coeffmp
+  movifnidn                  ncoeffq, ncoeffmp
+  mov                             r2, dequantmp
+  movifnidn                    zbinq, zbinmp
+  movifnidn                   roundq, roundmp
+  movifnidn                   quantq, quantmp
+  mova                            m0, [zbinq]              ; m0 = zbin
+  mova                            m1, [roundq]             ; m1 = round
+  mova                            m2, [quantq]             ; m2 = quant
+  mova                            m3, [r2]                 ; m3 = dequant
+  pcmpeqw                         m4, m4                   ; All lanes -1
+%ifidn %1, b_32x32
+  psubw                           m0, m4
+  psubw                           m1, m4
+  psrlw                           m0, 1                    ; m0 = (m0 + 1) / 2
+  psrlw                           m1, 1                    ; m1 = (m1 + 1) / 2
+%endif
+  paddw                           m0, m4                   ; m0 = m0 + 1
+
+  mov                             r2, shiftmp
+  mov                             r3, qcoeffmp
+  mova                            m4, [r2]                 ; m4 = shift
+  mov                             r4, dqcoeffmp
+  mov                             r5, iscanmp
+%ifidn %1, b_32x32
+  psllw                           m4, 1
+%endif
+  pxor                            m5, m5                   ; m5 = dedicated zero
+
+  DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  lea                         coeffq, [  coeffq+ncoeffq*4]
+  lea                        qcoeffq, [ qcoeffq+ncoeffq*4]
+  lea                       dqcoeffq, [dqcoeffq+ncoeffq*4]
+%else
+  lea                         coeffq, [  coeffq+ncoeffq*2]
+  lea                        qcoeffq, [ qcoeffq+ncoeffq*2]
+  lea                       dqcoeffq, [dqcoeffq+ncoeffq*2]
+%endif
+  lea                         iscanq, [  iscanq+ncoeffq*2]
+  neg                        ncoeffq
+
+  ; get DC and first 15 AC coeffs
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; coeff stored as 32bit numbers & require 16bit numbers
+  mova                            m9, [coeffq+ncoeffq*4+ 0]
+  packssdw                        m9, [coeffq+ncoeffq*4+16]
+  mova                           m10, [coeffq+ncoeffq*4+32]
+  packssdw                       m10, [coeffq+ncoeffq*4+48]
+%else
+  mova                            m9, [coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+  mova                           m10, [coeffq+ncoeffq*2+16] ; m10 = c[i]
+%endif
+
+  pabsw                           m6, m9                   ; m6 = abs(m9)
+  pabsw                          m11, m10                  ; m11 = abs(m10)
+  pcmpgtw                         m7, m6, m0               ; m7 = c[i] >= zbin
+  punpckhqdq                      m0, m0
+  pcmpgtw                        m12, m11, m0              ; m12 = c[i] >= zbin
+
+  ; Check if all coeffs are less than zbin. If yes, skip forward quickly.
+  por                            m14, m7, m12
+  ptest                          m14, m14
+  jnz .first_nonzero
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  mova        [qcoeffq+ncoeffq*4   ], ymm5
+  mova        [qcoeffq+ncoeffq*4+32], ymm5
+  mova       [dqcoeffq+ncoeffq*4   ], ymm5
+  mova       [dqcoeffq+ncoeffq*4+32], ymm5
+%else
+  mova           [qcoeffq+ncoeffq*2], ymm5
+  mova          [dqcoeffq+ncoeffq*2], ymm5
+%endif
+
+  add                        ncoeffq, mmsize
+
+  punpckhqdq                      m1, m1
+  punpckhqdq                      m2, m2
+  punpckhqdq                      m3, m3
+  punpckhqdq                      m4, m4
+  pxor                            m8, m8
+
+  jmp .ac_only_loop
+
+.first_nonzero:
+
+  paddsw                          m6, m1                   ; m6 += round
+  punpckhqdq                      m1, m1
+  paddsw                         m11, m1                   ; m11 += round
+  pmulhw                          m8, m6, m2               ; m8 = m6*q>>16
+  punpckhqdq                      m2, m2
+  pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
+  paddw                           m8, m6                   ; m8 += m6
+  paddw                          m13, m11                  ; m13 += m11
+  pmulhw                          m8, m4                   ; m8 = m8*qsh>>16
+  punpckhqdq                      m4, m4
+  pmulhw                         m13, m4                   ; m13 = m13*qsh>>16
+  psignw                          m8, m9                   ; m8 = reinsert sign
+  psignw                         m13, m10                  ; m13 = reinsert sign
+  pand                            m8, m7
+  pand                           m13, m12
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+  pcmpgtw                         m6, m5, m8
+  punpckhwd                       m6, m8, m6
+  pmovsxwd                       m11, m8
+  mova        [qcoeffq+ncoeffq*4+ 0], m11
+  mova        [qcoeffq+ncoeffq*4+16], m6
+  pcmpgtw                         m6, m5, m13
+  punpckhwd                       m6, m13, m6
+  pmovsxwd                       m11, m13
+  mova        [qcoeffq+ncoeffq*4+32], m11
+  mova        [qcoeffq+ncoeffq*4+48], m6
+%else
+  mova        [qcoeffq+ncoeffq*2+ 0], m8
+  mova        [qcoeffq+ncoeffq*2+16], m13
+%endif
+
+%ifidn %1, b_32x32
+  pabsw                           m8, m8
+  pabsw                          m13, m13
+%endif
+  pmullw                          m8, m3                   ; dqc[i] = qc[i] * q
+  punpckhqdq                      m3, m3
+  pmullw                         m13, m3                   ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+  psrlw                           m8, 1
+  psrlw                          m13, 1
+  psignw                          m8, m9
+  psignw                         m13, m10
+%endif
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+  pcmpgtw                         m6, m5, m8
+  punpckhwd                       m6, m8, m6
+  pmovsxwd                       m11, m8
+  mova       [dqcoeffq+ncoeffq*4+ 0], m11
+  mova       [dqcoeffq+ncoeffq*4+16], m6
+  pcmpgtw                         m6, m5, m13
+  punpckhwd                       m6, m13, m6
+  pmovsxwd                       m11, m13
+  mova       [dqcoeffq+ncoeffq*4+32], m11
+  mova       [dqcoeffq+ncoeffq*4+48], m6
+%else
+  mova       [dqcoeffq+ncoeffq*2+ 0], m8
+  mova       [dqcoeffq+ncoeffq*2+16], m13
+%endif
+
+  pcmpeqw                         m8, m5                    ; m8 = c[i] == 0
+  pcmpeqw                        m13, m5                    ; m13 = c[i] == 0
+  mova                            m6, [iscanq+ncoeffq*2]    ; m6 = scan[i]
+  mova                           m11, [iscanq+ncoeffq*2+16] ; m11 = scan[i]
+  psubw                           m6, m7                    ; m6 = scan[i] + 1
+  psubw                          m11, m12                   ; m11 = scan[i] + 1
+  pandn                           m8, m6                    ; m8 = max(eob)
+  pandn                          m13, m11                   ; m13 = max(eob)
+  pmaxsw                          m8, m13
+  add                        ncoeffq, mmsize
+
+.ac_only_loop:
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; pack coeff from 32bit to 16bit array
+  mova                            m9, [coeffq+ncoeffq*4+ 0]
+  packssdw                        m9, [coeffq+ncoeffq*4+16]
+  mova                           m10, [coeffq+ncoeffq*4+32]
+  packssdw                       m10, [coeffq+ncoeffq*4+48]
+%else
+  mova                            m9, [coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+  mova                           m10, [coeffq+ncoeffq*2+16] ; m10 = c[i]
+%endif
+
+  pabsw                           m6, m9                   ; m6 = abs(m9)
+  pabsw                          m11, m10                  ; m11 = abs(m10)
+  pcmpgtw                         m7, m6, m0               ; m7 = c[i] >= zbin
+  pcmpgtw                        m12, m11, m0              ; m12 = c[i] >= zbin
+
+  ; Check if all coeffs are less than zbin. If yes, skip this itertion.
+  ; And just write zeros as the result would be.
+  por                            m14, m7, m12
+  ptest                          m14, m14
+  jnz .rest_nonzero
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  mova        [qcoeffq+ncoeffq*4+ 0], ymm5
+  mova        [qcoeffq+ncoeffq*4+32], ymm5
+  mova       [dqcoeffq+ncoeffq*4+ 0], ymm5
+  mova       [dqcoeffq+ncoeffq*4+32], ymm5
+%else
+  mova        [qcoeffq+ncoeffq*2+ 0], ymm5
+  mova       [dqcoeffq+ncoeffq*2+ 0], ymm5
+%endif
+  add                        ncoeffq, mmsize
+  jnz .ac_only_loop
+
+  ; Horizontally accumulate/max eobs and write into [eob] memory pointer
+  mov                             r2, eobmp
+  pshufd                          m7, m8, 0xe
+  pmaxsw                          m8, m7
+  pshuflw                         m7, m8, 0xe
+  pmaxsw                          m8, m7
+  pshuflw                         m7, m8, 0x1
+  pmaxsw                          m8, m7
+  movq                           rax, m8
+  mov                           [r2], ax
+  vzeroupper
+  RET
+
+.rest_nonzero:
+  paddsw                          m6, m1                   ; m6 += round
+  paddsw                         m11, m1                   ; m11 += round
+  pmulhw                         m14, m6, m2               ; m14 = m6*q>>16
+  pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
+  paddw                          m14, m6                   ; m14 += m6
+  paddw                          m13, m11                  ; m13 += m11
+  pmulhw                         m14, m4                   ; m14 = m14*qsh>>16
+  pmulhw                         m13, m4                   ; m13 = m13*qsh>>16
+  psignw                         m14, m9                   ; m14 = reinsert sign
+  psignw                         m13, m10                  ; m13 = reinsert sign
+  pand                           m14, m7
+  pand                           m13, m12
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+  pcmpgtw                         m6, m5, m14
+  punpckhwd                       m6, m14, m6
+  pmovsxwd                       m11, m14
+  mova        [qcoeffq+ncoeffq*4+ 0], m11
+  mova        [qcoeffq+ncoeffq*4+16], m6
+  pcmpgtw                         m6, m5, m13
+  punpckhwd                       m6, m13, m6
+  pmovsxwd                       m11, m13
+  mova        [qcoeffq+ncoeffq*4+32], m11
+  mova        [qcoeffq+ncoeffq*4+48], m6
+%else
+  mova        [qcoeffq+ncoeffq*2+ 0], m14
+  mova        [qcoeffq+ncoeffq*2+16], m13
+%endif
+
+%ifidn %1, b_32x32
+  pabsw                          m14, m14
+  pabsw                          m13, m13
+%endif
+  pmullw                         m14, m3                   ; dqc[i] = qc[i] * q
+  pmullw                         m13, m3                   ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+  psrlw                          m14, 1
+  psrlw                          m13, 1
+  psignw                         m14, m9
+  psignw                         m13, m10
+%endif
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+  pcmpgtw                         m6, m5, m14
+  punpckhwd                       m6, m14, m6
+  pmovsxwd                       m11, m14
+  mova       [dqcoeffq+ncoeffq*4+ 0], m11
+  mova       [dqcoeffq+ncoeffq*4+16], m6
+  pcmpgtw                         m6, m5, m13
+  punpckhwd                       m6, m13, m6
+  pmovsxwd                       m11, m13
+  mova       [dqcoeffq+ncoeffq*4+32], m11
+  mova       [dqcoeffq+ncoeffq*4+48], m6
+%else
+  mova       [dqcoeffq+ncoeffq*2+ 0], m14
+  mova       [dqcoeffq+ncoeffq*2+16], m13
+%endif
+
+  pcmpeqw                        m14, m5                    ; m14 = c[i] == 0
+  pcmpeqw                        m13, m5                    ; m13 = c[i] == 0
+  mova                            m6, [iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+  mova                           m11, [iscanq+ncoeffq*2+16] ; m11 = scan[i]
+  psubw                           m6, m7                    ; m6 = scan[i] + 1
+  psubw                          m11, m12                   ; m11 = scan[i] + 1
+  pandn                          m14, m6                    ; m14 = max(eob)
+  pandn                          m13, m11                   ; m13 = max(eob)
+  pmaxsw                          m8, m14
+  pmaxsw                          m8, m13
+  add                        ncoeffq, mmsize
+  jnz .ac_only_loop
+
+  ; Horizontally accumulate/max eobs and write into [eob] memory pointer
+  mov                             r2, eobmp
+  pshufd                          m7, m8, 0xe
+  pmaxsw                          m8, m7
+  pshuflw                         m7, m8, 0xe
+  pmaxsw                          m8, m7
+  pshuflw                         m7, m8, 0x1
+  pmaxsw                          m8, m7
+  movq                           rax, m8
+  mov                           [r2], ax
+  vzeroupper
+  RET
+
+  ; Skip-block, i.e. just write all zeroes
+.blank:
+
+DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \
+            qcoeff, dqcoeff, dequant, eob, scan, iscan
+
+  mov                             r0, dqcoeffmp
+  movifnidn                  ncoeffq, ncoeffmp
+  mov                             r2, qcoeffmp
+  mov                             r3, eobmp
+
+DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
+
+%if CONFIG_VP9_HIGHBITDEPTH
+  lea                       dqcoeffq, [dqcoeffq+ncoeffq*4]
+  lea                        qcoeffq, [ qcoeffq+ncoeffq*4]
+%else
+  lea                       dqcoeffq, [dqcoeffq+ncoeffq*2]
+  lea                        qcoeffq, [ qcoeffq+ncoeffq*2]
+%endif
+
+  neg                        ncoeffq
+  pxor                            m7, m7
+
+.blank_loop:
+%if CONFIG_VP9_HIGHBITDEPTH
+  mova       [dqcoeffq+ncoeffq*4+ 0], ymm7
+  mova       [dqcoeffq+ncoeffq*4+32], ymm7
+  mova        [qcoeffq+ncoeffq*4+ 0], ymm7
+  mova        [qcoeffq+ncoeffq*4+32], ymm7
+%else
+  mova       [dqcoeffq+ncoeffq*2+ 0], ymm7
+  mova        [qcoeffq+ncoeffq*2+ 0], ymm7
+%endif
+  add                        ncoeffq, mmsize
+  jl .blank_loop
+
+  mov                         [eobq], word 0
+
+  vzeroupper
+  RET
+%endmacro
+
+INIT_XMM avx
+QUANTIZE_FN b, 7
+QUANTIZE_FN b_32x32, 7
+
+END

diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 7739218..e1ee46e 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c

@@ -203,6 +203,15 @@
         return -1;
 
       ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+      // This memset is needed for fixing the issue of using uninitialized
+      // value in msan test. It will cause a perf loss, so only do this for
+      // msan test.
+      memset(ybf->buffer_alloc, 0, (int)frame_size);
+#endif
+#endif
     } else if (frame_size > (size_t)ybf->buffer_alloc_sz) {
       // Allocation to hold larger frame, or first allocation.
       vpx_free(ybf->buffer_alloc);

diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index 3a04452..37b255d 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h

@@ -56,7 +56,7 @@
   int subsampling_y;
   unsigned int bit_depth;
   vpx_color_space_t color_space;
-  int color_range;
+  vpx_color_range_t color_range;
   int render_width;
   int render_height;
 

diff --git a/vpxenc.c b/vpxenc.c
index cb78226..fd2acf6 100644
--- a/vpxenc.c
+++ b/vpxenc.c

@@ -382,7 +382,7 @@
 static const arg_def_t tile_rows = ARG_DEF(
     NULL, "tile-rows", 1, "Number of tile rows to use, log2");
 static const arg_def_t lossless = ARG_DEF(
-    NULL, "lossless", 1, "Lossless mode");
+    NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)");
 static const arg_def_t frame_parallel_decoding = ARG_DEF(
     NULL, "frame-parallel", 1, "Enable frame parallel decodability features");
 static const arg_def_t aq_mode = ARG_DEF(
commit	f48321974bf1bc62494fd4fb983eb745b48a184e	[log] [tgz]
author	Scott LaVarnway <slavarnway@google.com>	Tue Nov 10 21:40:11 2015 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Tue Nov 10 21:40:11 2015 +0000
tree	f5240e17b6bb7233b237a081286df3a8acf8e0f7
parent	19272d866bde3a8c3f9c8395ba38e8ff3b42bb5a [diff]
parent	9aeaa2016e7470c4e316d90da33d883098eed6f4 [diff]