Move shared SAD code to vpx_dsp

Create a new component, vpx_dsp, for code that can be shared
between codecs. Move the SAD code into the component.

This reduces the size of vpxenc/dec by 36k on x86_64 builds.

Change-Id: I73f837ddaecac6b350bf757af0cfe19c4ab9327a
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 65e9561..6c28edb 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -14,14 +14,25 @@
 #include <stdio.h>
 
 #include "./vpx_config.h"
-#if CONFIG_VP8_ENCODER
-#include "./vp8_rtcd.h"
-#endif
-#if CONFIG_VP9_ENCODER
-#include "./vp9_rtcd.h"
-#endif
+#include "./vpx_dsp_rtcd.h"
 #include "vpx_mem/vpx_mem.h"
 
+/* Needed for ROUND_POWER_OF_TWO and CONVERT_TO* macros, both of which should be
+ * moved to a more generic location. Alternatively the *avg functions could be
+ * restricted to VP9 builds, but it would be better to avoid that sort of
+ * specificity.
+ * TODO(johannkoenig): move these macros to a common location.
+ */
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vp9/common/vp9_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#ifndef ROUND_POWER_OF_TWO
+#define ROUND_POWER_OF_TWO(value, n) \
+     (((value) + (1 << ((n) - 1))) >> (n))
+#endif  // ROUND_POWER_OF_TWO
+
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -30,27 +41,18 @@
 #include "vpx/vpx_codec.h"
 
 
-#if CONFIG_VP8_ENCODER
-typedef unsigned int (*SadMxNFunc)(const unsigned char *source_ptr,
-                                   int source_stride,
-                                   const unsigned char *reference_ptr,
-                                   int reference_stride,
-                                   unsigned int max_sad);
+typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr,
+                                   int src_stride,
+                                   const uint8_t *ref_ptr,
+                                   int ref_stride);
 typedef std::tr1::tuple<int, int, SadMxNFunc, int> SadMxNParam;
-#endif
-#if CONFIG_VP9_ENCODER
-typedef unsigned int (*SadMxNVp9Func)(const unsigned char *source_ptr,
-                                      int source_stride,
-                                      const unsigned char *reference_ptr,
-                                      int reference_stride);
-typedef std::tr1::tuple<int, int, SadMxNVp9Func, int> SadMxNVp9Param;
-typedef uint32_t (*SadMxNAvgVp9Func)(const uint8_t *source_ptr,
-                                     int source_stride,
-                                     const uint8_t *reference_ptr,
-                                     int reference_stride,
-                                     const uint8_t *second_pred);
-typedef std::tr1::tuple<int, int, SadMxNAvgVp9Func, int> SadMxNAvgVp9Param;
-#endif
+
+typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr,
+                                  int src_stride,
+                                  const uint8_t *ref_ptr,
+                                  int ref_stride,
+                                  const uint8_t *second_pred);
+typedef std::tr1::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam;
 
 typedef void (*SadMxNx4Func)(const uint8_t *src_ptr,
                              int src_stride,
@@ -68,7 +70,6 @@
       width_(width), height_(height), bd_(bit_depth) {}
 
   static void SetUpTestCase() {
-#if CONFIG_VP9_HIGHBITDEPTH
     source_data8_ = reinterpret_cast<uint8_t*>(
         vpx_memalign(kDataAlignment, kDataBlockSize));
     reference_data8_ = reinterpret_cast<uint8_t*>(
@@ -81,18 +82,9 @@
         vpx_memalign(kDataAlignment, kDataBufferSize*sizeof(uint16_t)));
     second_pred16_ = reinterpret_cast<uint16_t*>(
         vpx_memalign(kDataAlignment, 64*64*sizeof(uint16_t)));
-#else
-    source_data_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBlockSize));
-    reference_data_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-    second_pred_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, 64*64));
-#endif
   }
 
   static void TearDownTestCase() {
-#if CONFIG_VP9_HIGHBITDEPTH
     vpx_free(source_data8_);
     source_data8_ = NULL;
     vpx_free(reference_data8_);
@@ -105,14 +97,6 @@
     reference_data16_ = NULL;
     vpx_free(second_pred16_);
     second_pred16_ = NULL;
-#else
-    vpx_free(source_data_);
-    source_data_ = NULL;
-    vpx_free(reference_data_);
-    reference_data_ = NULL;
-    vpx_free(second_pred_);
-    second_pred_ = NULL;
-#endif
   }
 
   virtual void TearDown() {
@@ -126,23 +110,21 @@
   static const int kDataBufferSize = 4 * kDataBlockSize;
 
   virtual void SetUp() {
-#if CONFIG_VP9_HIGHBITDEPTH
     if (bd_ == -1) {
       use_high_bit_depth_ = false;
       bit_depth_ = VPX_BITS_8;
       source_data_ = source_data8_;
       reference_data_ = reference_data8_;
       second_pred_ = second_pred8_;
+#if CONFIG_VP9_HIGHBITDEPTH
     } else {
       use_high_bit_depth_ = true;
       bit_depth_ = static_cast<vpx_bit_depth_t>(bd_);
       source_data_ = CONVERT_TO_BYTEPTR(source_data16_);
       reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
       second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    bit_depth_ = VPX_BITS_8;
-#endif
     mask_ = (1 << bit_depth_) - 1;
     source_stride_ = (width_ + 31) & ~31;
     reference_stride_ = width_ * 2;
@@ -151,51 +133,35 @@
 
   virtual uint8_t *GetReference(int block_idx) {
 #if CONFIG_VP9_HIGHBITDEPTH
-    if (!use_high_bit_depth_) {
-      return reference_data_ + block_idx * kDataBlockSize;
-    } else {
+    if (use_high_bit_depth_)
       return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
                                 block_idx * kDataBlockSize);
-    }
-#else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     return reference_data_ + block_idx * kDataBlockSize;
-#endif
   }
 
   // Sum of Absolute Differences. Given two blocks, calculate the absolute
   // difference between two pixels in the same relative location; accumulate.
-  unsigned int ReferenceSAD(unsigned int max_sad, int block_idx) {
+  unsigned int ReferenceSAD(int block_idx) {
     unsigned int sad = 0;
-#if CONFIG_VP9_HIGHBITDEPTH
       const uint8_t *const reference8 = GetReference(block_idx);
       const uint8_t *const source8 = source_data_;
+#if CONFIG_VP9_HIGHBITDEPTH
       const uint16_t *const reference16 =
           CONVERT_TO_SHORTPTR(GetReference(block_idx));
       const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
-#else
-    const uint8_t *const reference = GetReference(block_idx);
-    const uint8_t *const source = source_data_;
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     for (int h = 0; h < height_; ++h) {
       for (int w = 0; w < width_; ++w) {
-#if CONFIG_VP9_HIGHBITDEPTH
         if (!use_high_bit_depth_) {
-          sad +=
-              abs(source8[h * source_stride_ + w] -
-                  reference8[h * reference_stride_ + w]);
+          sad += abs(source8[h * source_stride_ + w] -
+                     reference8[h * reference_stride_ + w]);
+#if CONFIG_VP9_HIGHBITDEPTH
         } else {
-          sad +=
-              abs(source16[h * source_stride_ + w] -
-                  reference16[h * reference_stride_ + w]);
+          sad += abs(source16[h * source_stride_ + w] -
+                     reference16[h * reference_stride_ + w]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
         }
-#else
-        sad +=
-            abs(source[h * source_stride_ + w] -
-                reference[h * reference_stride_ + w]);
-#endif
-      }
-      if (sad > max_sad) {
-        break;
       }
     }
     return sad;
@@ -204,85 +170,69 @@
   // Sum of Absolute Differences Average. Given two blocks, and a prediction
   // calculate the absolute difference between one pixel and average of the
   // corresponding and predicted pixels; accumulate.
-  unsigned int ReferenceSADavg(unsigned int max_sad, int block_idx) {
+  unsigned int ReferenceSADavg(int block_idx) {
     unsigned int sad = 0;
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const source8 = source_data_;
+    const uint8_t *const second_pred8 = second_pred_;
 #if CONFIG_VP9_HIGHBITDEPTH
-      const uint8_t *const reference8 = GetReference(block_idx);
-      const uint8_t *const source8 = source_data_;
-      const uint8_t *const second_pred8 = second_pred_;
-      const uint16_t *const reference16 =
-          CONVERT_TO_SHORTPTR(GetReference(block_idx));
-      const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
-      const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
-#else
-    const uint8_t *const reference = GetReference(block_idx);
-    const uint8_t *const source = source_data_;
-    const uint8_t *const second_pred = second_pred_;
-#endif
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+    const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     for (int h = 0; h < height_; ++h) {
       for (int w = 0; w < width_; ++w) {
-#if CONFIG_VP9_HIGHBITDEPTH
         if (!use_high_bit_depth_) {
           const int tmp = second_pred8[h * width_ + w] +
               reference8[h * reference_stride_ + w];
           const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
           sad += abs(source8[h * source_stride_ + w] - comp_pred);
+#if CONFIG_VP9_HIGHBITDEPTH
         } else {
           const int tmp = second_pred16[h * width_ + w] +
               reference16[h * reference_stride_ + w];
           const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
           sad += abs(source16[h * source_stride_ + w] - comp_pred);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
         }
-#else
-        const int tmp = second_pred[h * width_ + w] +
-            reference[h * reference_stride_ + w];
-        const uint8_t comp_pred = (tmp + 1) >> 1;
-        sad += abs(source[h * source_stride_ + w] - comp_pred);
-#endif
-      }
-      if (sad > max_sad) {
-        break;
       }
     }
     return sad;
   }
 
   void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
-#if CONFIG_VP9_HIGHBITDEPTH
     uint8_t *data8 = data;
+#if CONFIG_VP9_HIGHBITDEPTH
     uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     for (int h = 0; h < height_; ++h) {
       for (int w = 0; w < width_; ++w) {
-#if CONFIG_VP9_HIGHBITDEPTH
         if (!use_high_bit_depth_) {
           data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
+#if CONFIG_VP9_HIGHBITDEPTH
         } else {
           data16[h * stride + w] = fill_constant;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
         }
-#else
-        data[h * stride + w] = static_cast<uint8_t>(fill_constant);
-#endif
       }
     }
   }
 
   void FillRandom(uint8_t *data, int stride) {
-#if CONFIG_VP9_HIGHBITDEPTH
     uint8_t *data8 = data;
+#if CONFIG_VP9_HIGHBITDEPTH
     uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     for (int h = 0; h < height_; ++h) {
       for (int w = 0; w < width_; ++w) {
-#if CONFIG_VP9_HIGHBITDEPTH
         if (!use_high_bit_depth_) {
           data8[h * stride + w] = rnd_.Rand8();
+#if CONFIG_VP9_HIGHBITDEPTH
         } else {
           data16[h * stride + w] = rnd_.Rand16() & mask_;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
         }
-#else
-        data[h * stride + w] = rnd_.Rand8();
-#endif
       }
     }
   }
@@ -293,7 +243,6 @@
   static uint8_t *reference_data_;
   static uint8_t *second_pred_;
   int source_stride_;
-#if CONFIG_VP9_HIGHBITDEPTH
   bool use_high_bit_depth_;
   static uint8_t *source_data8_;
   static uint8_t *reference_data8_;
@@ -301,7 +250,6 @@
   static uint16_t *source_data16_;
   static uint16_t *reference_data16_;
   static uint16_t *second_pred16_;
-#endif
   int reference_stride_;
 
   ACMRandom rnd_;
@@ -315,11 +263,11 @@
 
  protected:
   void SADs(unsigned int *results) {
-    const uint8_t *refs[] = {GetReference(0), GetReference(1),
-                             GetReference(2), GetReference(3)};
+    const uint8_t *references[] = {GetReference(0), GetReference(1),
+                                   GetReference(2), GetReference(3)};
 
     ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
-                                          refs, reference_stride_,
+                                          references, reference_stride_,
                                           results));
   }
 
@@ -328,14 +276,13 @@
 
     SADs(exp_sad);
     for (int block = 0; block < 4; ++block) {
-      reference_sad = ReferenceSAD(UINT_MAX, block);
+      reference_sad = ReferenceSAD(block);
 
       EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
     }
   }
 };
 
-#if CONFIG_VP8_ENCODER
 class SADTest
     : public SADTestBase,
       public ::testing::WithParamInterface<SadMxNParam> {
@@ -343,38 +290,6 @@
   SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
 
  protected:
-  unsigned int SAD(unsigned int max_sad, int block_idx) {
-    unsigned int ret;
-    const uint8_t *const reference = GetReference(block_idx);
-
-    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
-                                                reference, reference_stride_,
-                                                max_sad));
-    return ret;
-  }
-
-  void CheckSAD(unsigned int max_sad) {
-    const unsigned int reference_sad = ReferenceSAD(max_sad, 0);
-    const unsigned int exp_sad = SAD(max_sad, 0);
-
-    if (reference_sad <= max_sad) {
-      ASSERT_EQ(exp_sad, reference_sad);
-    } else {
-      // Alternative implementations are not required to check max_sad
-      ASSERT_GE(exp_sad, reference_sad);
-    }
-  }
-};
-#endif  // CONFIG_VP8_ENCODER
-
-#if CONFIG_VP9_ENCODER
-class SADVP9Test
-    : public SADTestBase,
-      public ::testing::WithParamInterface<SadMxNVp9Param> {
- public:
-  SADVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
-
- protected:
   unsigned int SAD(int block_idx) {
     unsigned int ret;
     const uint8_t *const reference = GetReference(block_idx);
@@ -385,18 +300,18 @@
   }
 
   void CheckSAD() {
-    const unsigned int reference_sad = ReferenceSAD(UINT_MAX, 0);
+    const unsigned int reference_sad = ReferenceSAD(0);
     const unsigned int exp_sad = SAD(0);
 
     ASSERT_EQ(reference_sad, exp_sad);
   }
 };
 
-class SADavgVP9Test
+class SADavgTest
     : public SADTestBase,
-      public ::testing::WithParamInterface<SadMxNAvgVp9Param> {
+      public ::testing::WithParamInterface<SadMxNAvgParam> {
  public:
-  SADavgVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+  SADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
 
  protected:
   unsigned int SAD_avg(int block_idx) {
@@ -410,45 +325,41 @@
   }
 
   void CheckSAD() {
-    const unsigned int reference_sad = ReferenceSADavg(UINT_MAX, 0);
+    const unsigned int reference_sad = ReferenceSADavg(0);
     const unsigned int exp_sad = SAD_avg(0);
 
     ASSERT_EQ(reference_sad, exp_sad);
   }
 };
-#endif  // CONFIG_VP9_ENCODER
 
 uint8_t *SADTestBase::source_data_ = NULL;
 uint8_t *SADTestBase::reference_data_ = NULL;
 uint8_t *SADTestBase::second_pred_ = NULL;
-#if CONFIG_VP9_ENCODER && CONFIG_VP9_HIGHBITDEPTH
 uint8_t *SADTestBase::source_data8_ = NULL;
 uint8_t *SADTestBase::reference_data8_ = NULL;
 uint8_t *SADTestBase::second_pred8_ = NULL;
 uint16_t *SADTestBase::source_data16_ = NULL;
 uint16_t *SADTestBase::reference_data16_ = NULL;
 uint16_t *SADTestBase::second_pred16_ = NULL;
-#endif
 
-#if CONFIG_VP8_ENCODER
 TEST_P(SADTest, MaxRef) {
   FillConstant(source_data_, source_stride_, 0);
   FillConstant(reference_data_, reference_stride_, mask_);
-  CheckSAD(UINT_MAX);
+  CheckSAD();
 }
 
 TEST_P(SADTest, MaxSrc) {
   FillConstant(source_data_, source_stride_, mask_);
   FillConstant(reference_data_, reference_stride_, 0);
-  CheckSAD(UINT_MAX);
+  CheckSAD();
 }
 
 TEST_P(SADTest, ShortRef) {
-  int tmp_stride = reference_stride_;
+  const int tmp_stride = reference_stride_;
   reference_stride_ >>= 1;
   FillRandom(source_data_, source_stride_);
   FillRandom(reference_data_, reference_stride_);
-  CheckSAD(UINT_MAX);
+  CheckSAD();
   reference_stride_ = tmp_stride;
 }
 
@@ -459,7 +370,7 @@
   reference_stride_ -= 1;
   FillRandom(source_data_, source_stride_);
   FillRandom(reference_data_, reference_stride_);
-  CheckSAD(UINT_MAX);
+  CheckSAD();
   reference_stride_ = tmp_stride;
 }
 
@@ -468,75 +379,24 @@
   source_stride_ >>= 1;
   FillRandom(source_data_, source_stride_);
   FillRandom(reference_data_, reference_stride_);
-  CheckSAD(UINT_MAX);
-  source_stride_ = tmp_stride;
-}
-
-TEST_P(SADTest, MaxSAD) {
-  // Verify that, when max_sad is set, the implementation does not return a
-  // value lower than the reference.
-  FillConstant(source_data_, source_stride_, mask_);
-  FillConstant(reference_data_, reference_stride_, 0);
-  CheckSAD(128);
-}
-#endif  // CONFIG_VP8_ENCODER
-
-#if CONFIG_VP9_ENCODER
-TEST_P(SADVP9Test, MaxRef) {
-  FillConstant(source_data_, source_stride_, 0);
-  FillConstant(reference_data_, reference_stride_, mask_);
-  CheckSAD();
-}
-
-TEST_P(SADVP9Test, MaxSrc) {
-  FillConstant(source_data_, source_stride_, mask_);
-  FillConstant(reference_data_, reference_stride_, 0);
-  CheckSAD();
-}
-
-TEST_P(SADVP9Test, ShortRef) {
-  const int tmp_stride = reference_stride_;
-  reference_stride_ >>= 1;
-  FillRandom(source_data_, source_stride_);
-  FillRandom(reference_data_, reference_stride_);
-  CheckSAD();
-  reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADVP9Test, UnalignedRef) {
-  // The reference frame, but not the source frame, may be unaligned for
-  // certain types of searches.
-  const int tmp_stride = reference_stride_;
-  reference_stride_ -= 1;
-  FillRandom(source_data_, source_stride_);
-  FillRandom(reference_data_, reference_stride_);
-  CheckSAD();
-  reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADVP9Test, ShortSrc) {
-  const int tmp_stride = source_stride_;
-  source_stride_ >>= 1;
-  FillRandom(source_data_, source_stride_);
-  FillRandom(reference_data_, reference_stride_);
   CheckSAD();
   source_stride_ = tmp_stride;
 }
 
-TEST_P(SADavgVP9Test, MaxRef) {
+TEST_P(SADavgTest, MaxRef) {
   FillConstant(source_data_, source_stride_, 0);
   FillConstant(reference_data_, reference_stride_, mask_);
   FillConstant(second_pred_, width_, 0);
   CheckSAD();
 }
-TEST_P(SADavgVP9Test, MaxSrc) {
+TEST_P(SADavgTest, MaxSrc) {
   FillConstant(source_data_, source_stride_, mask_);
   FillConstant(reference_data_, reference_stride_, 0);
   FillConstant(second_pred_, width_, 0);
   CheckSAD();
 }
 
-TEST_P(SADavgVP9Test, ShortRef) {
+TEST_P(SADavgTest, ShortRef) {
   const int tmp_stride = reference_stride_;
   reference_stride_ >>= 1;
   FillRandom(source_data_, source_stride_);
@@ -546,7 +406,7 @@
   reference_stride_ = tmp_stride;
 }
 
-TEST_P(SADavgVP9Test, UnalignedRef) {
+TEST_P(SADavgTest, UnalignedRef) {
   // The reference frame, but not the source frame, may be unaligned for
   // certain types of searches.
   const int tmp_stride = reference_stride_;
@@ -558,7 +418,7 @@
   reference_stride_ = tmp_stride;
 }
 
-TEST_P(SADavgVP9Test, ShortSrc) {
+TEST_P(SADavgTest, ShortSrc) {
   const int tmp_stride = source_stride_;
   source_stride_ >>= 1;
   FillRandom(source_data_, source_stride_);
@@ -567,7 +427,6 @@
   CheckSAD();
   source_stride_ = tmp_stride;
 }
-#endif  // CONFIG_VP9_ENCODER
 
 TEST_P(SADx4Test, MaxRef) {
   FillConstant(source_data_, source_stride_, 0);
@@ -641,617 +500,633 @@
 
 //------------------------------------------------------------------------------
 // C functions
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_c = vp8_sad16x16_c;
-const SadMxNFunc sad_8x16_c = vp8_sad8x16_c;
-const SadMxNFunc sad_16x8_c = vp8_sad16x8_c;
-const SadMxNFunc sad_8x8_c = vp8_sad8x8_c;
-const SadMxNFunc sad_4x4_c = vp8_sad4x4_c;
+const SadMxNFunc sad64x64_c = vpx_sad64x64_c;
+const SadMxNFunc sad64x32_c = vpx_sad64x32_c;
+const SadMxNFunc sad32x64_c = vpx_sad32x64_c;
+const SadMxNFunc sad32x32_c = vpx_sad32x32_c;
+const SadMxNFunc sad32x16_c = vpx_sad32x16_c;
+const SadMxNFunc sad16x32_c = vpx_sad16x32_c;
+const SadMxNFunc sad16x16_c = vpx_sad16x16_c;
+const SadMxNFunc sad16x8_c = vpx_sad16x8_c;
+const SadMxNFunc sad8x16_c = vpx_sad8x16_c;
+const SadMxNFunc sad8x8_c = vpx_sad8x8_c;
+const SadMxNFunc sad8x4_c = vpx_sad8x4_c;
+const SadMxNFunc sad4x8_c = vpx_sad4x8_c;
+const SadMxNFunc sad4x4_c = vpx_sad4x4_c;
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNFunc highbd_sad64x64_c = vpx_highbd_sad64x64_c;
+const SadMxNFunc highbd_sad64x32_c = vpx_highbd_sad64x32_c;
+const SadMxNFunc highbd_sad32x64_c = vpx_highbd_sad32x64_c;
+const SadMxNFunc highbd_sad32x32_c = vpx_highbd_sad32x32_c;
+const SadMxNFunc highbd_sad32x16_c = vpx_highbd_sad32x16_c;
+const SadMxNFunc highbd_sad16x32_c = vpx_highbd_sad16x32_c;
+const SadMxNFunc highbd_sad16x16_c = vpx_highbd_sad16x16_c;
+const SadMxNFunc highbd_sad16x8_c = vpx_highbd_sad16x8_c;
+const SadMxNFunc highbd_sad8x16_c = vpx_highbd_sad8x16_c;
+const SadMxNFunc highbd_sad8x8_c = vpx_highbd_sad8x8_c;
+const SadMxNFunc highbd_sad8x4_c = vpx_highbd_sad8x4_c;
+const SadMxNFunc highbd_sad4x8_c = vpx_highbd_sad4x8_c;
+const SadMxNFunc highbd_sad4x4_c = vpx_highbd_sad4x4_c;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 const SadMxNParam c_tests[] = {
-  make_tuple(16, 16, sad_16x16_c, -1),
-  make_tuple(8, 16, sad_8x16_c, -1),
-  make_tuple(16, 8, sad_16x8_c, -1),
-  make_tuple(8, 8, sad_8x8_c, -1),
-  make_tuple(4, 4, sad_4x4_c, -1),
+  make_tuple(64, 64, sad64x64_c, -1),
+  make_tuple(64, 32, sad64x32_c, -1),
+  make_tuple(32, 64, sad32x64_c, -1),
+  make_tuple(32, 32, sad32x32_c, -1),
+  make_tuple(32, 16, sad32x16_c, -1),
+  make_tuple(16, 32, sad16x32_c, -1),
+  make_tuple(16, 16, sad16x16_c, -1),
+  make_tuple(16, 8, sad16x8_c, -1),
+  make_tuple(8, 16, sad8x16_c, -1),
+  make_tuple(8, 8, sad8x8_c, -1),
+  make_tuple(8, 4, sad8x4_c, -1),
+  make_tuple(4, 8, sad4x8_c, -1),
+  make_tuple(4, 4, sad4x4_c, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(64, 64, highbd_sad64x64_c, 8),
+  make_tuple(64, 32, highbd_sad64x32_c, 8),
+  make_tuple(32, 64, highbd_sad32x64_c, 8),
+  make_tuple(32, 32, highbd_sad32x32_c, 8),
+  make_tuple(32, 16, highbd_sad32x16_c, 8),
+  make_tuple(16, 32, highbd_sad16x32_c, 8),
+  make_tuple(16, 16, highbd_sad16x16_c, 8),
+  make_tuple(16, 8, highbd_sad16x8_c, 8),
+  make_tuple(8, 16, highbd_sad8x16_c, 8),
+  make_tuple(8, 8, highbd_sad8x8_c, 8),
+  make_tuple(8, 4, highbd_sad8x4_c, 8),
+  make_tuple(4, 8, highbd_sad4x8_c, 8),
+  make_tuple(4, 4, highbd_sad4x4_c, 8),
+  make_tuple(64, 64, highbd_sad64x64_c, 10),
+  make_tuple(64, 32, highbd_sad64x32_c, 10),
+  make_tuple(32, 64, highbd_sad32x64_c, 10),
+  make_tuple(32, 32, highbd_sad32x32_c, 10),
+  make_tuple(32, 16, highbd_sad32x16_c, 10),
+  make_tuple(16, 32, highbd_sad16x32_c, 10),
+  make_tuple(16, 16, highbd_sad16x16_c, 10),
+  make_tuple(16, 8, highbd_sad16x8_c, 10),
+  make_tuple(8, 16, highbd_sad8x16_c, 10),
+  make_tuple(8, 8, highbd_sad8x8_c, 10),
+  make_tuple(8, 4, highbd_sad8x4_c, 10),
+  make_tuple(4, 8, highbd_sad4x8_c, 10),
+  make_tuple(4, 4, highbd_sad4x4_c, 10),
+  make_tuple(64, 64, highbd_sad64x64_c, 12),
+  make_tuple(64, 32, highbd_sad64x32_c, 12),
+  make_tuple(32, 64, highbd_sad32x64_c, 12),
+  make_tuple(32, 32, highbd_sad32x32_c, 12),
+  make_tuple(32, 16, highbd_sad32x16_c, 12),
+  make_tuple(16, 32, highbd_sad16x32_c, 12),
+  make_tuple(16, 16, highbd_sad16x16_c, 12),
+  make_tuple(16, 8, highbd_sad16x8_c, 12),
+  make_tuple(8, 16, highbd_sad8x16_c, 12),
+  make_tuple(8, 8, highbd_sad8x8_c, 12),
+  make_tuple(8, 4, highbd_sad8x4_c, 12),
+  make_tuple(4, 8, highbd_sad4x8_c, 12),
+  make_tuple(4, 4, highbd_sad4x4_c, 12),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
-#endif  // CONFIG_VP8_ENCODER
 
-#if CONFIG_VP9_ENCODER
-const SadMxNVp9Func sad_64x64_c_vp9 = vp9_sad64x64_c;
-const SadMxNVp9Func sad_32x32_c_vp9 = vp9_sad32x32_c;
-const SadMxNVp9Func sad_16x16_c_vp9 = vp9_sad16x16_c;
-const SadMxNVp9Func sad_8x16_c_vp9 = vp9_sad8x16_c;
-const SadMxNVp9Func sad_16x8_c_vp9 = vp9_sad16x8_c;
-const SadMxNVp9Func sad_8x8_c_vp9 = vp9_sad8x8_c;
-const SadMxNVp9Func sad_8x4_c_vp9 = vp9_sad8x4_c;
-const SadMxNVp9Func sad_4x8_c_vp9 = vp9_sad4x8_c;
-const SadMxNVp9Func sad_4x4_c_vp9 = vp9_sad4x4_c;
-const SadMxNVp9Param c_vp9_tests[] = {
-  make_tuple(64, 64, sad_64x64_c_vp9, -1),
-  make_tuple(32, 32, sad_32x32_c_vp9, -1),
-  make_tuple(16, 16, sad_16x16_c_vp9, -1),
-  make_tuple(8, 16, sad_8x16_c_vp9, -1),
-  make_tuple(16, 8, sad_16x8_c_vp9, -1),
-  make_tuple(8, 8, sad_8x8_c_vp9, -1),
-  make_tuple(8, 4, sad_8x4_c_vp9, -1),
-  make_tuple(4, 8, sad_4x8_c_vp9, -1),
-  make_tuple(4, 4, sad_4x4_c_vp9, -1),
-};
-INSTANTIATE_TEST_CASE_P(C, SADVP9Test, ::testing::ValuesIn(c_vp9_tests));
-
-const SadMxNx4Func sad_64x64x4d_c = vp9_sad64x64x4d_c;
-const SadMxNx4Func sad_64x32x4d_c = vp9_sad64x32x4d_c;
-const SadMxNx4Func sad_32x64x4d_c = vp9_sad32x64x4d_c;
-const SadMxNx4Func sad_32x32x4d_c = vp9_sad32x32x4d_c;
-const SadMxNx4Func sad_32x16x4d_c = vp9_sad32x16x4d_c;
-const SadMxNx4Func sad_16x32x4d_c = vp9_sad16x32x4d_c;
-const SadMxNx4Func sad_16x16x4d_c = vp9_sad16x16x4d_c;
-const SadMxNx4Func sad_16x8x4d_c = vp9_sad16x8x4d_c;
-const SadMxNx4Func sad_8x16x4d_c = vp9_sad8x16x4d_c;
-const SadMxNx4Func sad_8x8x4d_c = vp9_sad8x8x4d_c;
-const SadMxNx4Func sad_8x4x4d_c = vp9_sad8x4x4d_c;
-const SadMxNx4Func sad_4x8x4d_c = vp9_sad4x8x4d_c;
-const SadMxNx4Func sad_4x4x4d_c = vp9_sad4x4x4d_c;
-INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64x4d_c, -1),
-                        make_tuple(64, 32, sad_64x32x4d_c, -1),
-                        make_tuple(32, 64, sad_32x64x4d_c, -1),
-                        make_tuple(32, 32, sad_32x32x4d_c, -1),
-                        make_tuple(32, 16, sad_32x16x4d_c, -1),
-                        make_tuple(16, 32, sad_16x32x4d_c, -1),
-                        make_tuple(16, 16, sad_16x16x4d_c, -1),
-                        make_tuple(16, 8, sad_16x8x4d_c, -1),
-                        make_tuple(8, 16, sad_8x16x4d_c, -1),
-                        make_tuple(8, 8, sad_8x8x4d_c, -1),
-                        make_tuple(8, 4, sad_8x4x4d_c, -1),
-                        make_tuple(4, 8, sad_4x8x4d_c, -1),
-                        make_tuple(4, 4, sad_4x4x4d_c, -1)));
-
+const SadMxNAvgFunc sad64x64_avg_c = vpx_sad64x64_avg_c;
+const SadMxNAvgFunc sad64x32_avg_c = vpx_sad64x32_avg_c;
+const SadMxNAvgFunc sad32x64_avg_c = vpx_sad32x64_avg_c;
+const SadMxNAvgFunc sad32x32_avg_c = vpx_sad32x32_avg_c;
+const SadMxNAvgFunc sad32x16_avg_c = vpx_sad32x16_avg_c;
+const SadMxNAvgFunc sad16x32_avg_c = vpx_sad16x32_avg_c;
+const SadMxNAvgFunc sad16x16_avg_c = vpx_sad16x16_avg_c;
+const SadMxNAvgFunc sad16x8_avg_c = vpx_sad16x8_avg_c;
+const SadMxNAvgFunc sad8x16_avg_c = vpx_sad8x16_avg_c;
+const SadMxNAvgFunc sad8x8_avg_c = vpx_sad8x8_avg_c;
+const SadMxNAvgFunc sad8x4_avg_c = vpx_sad8x4_avg_c;
+const SadMxNAvgFunc sad4x8_avg_c = vpx_sad4x8_avg_c;
+const SadMxNAvgFunc sad4x4_avg_c = vpx_sad4x4_avg_c;
 #if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNVp9Func highbd_sad_64x64_c_vp9 = vp9_highbd_sad64x64_c;
-const SadMxNVp9Func highbd_sad_32x32_c_vp9 = vp9_highbd_sad32x32_c;
-const SadMxNVp9Func highbd_sad_16x16_c_vp9 = vp9_highbd_sad16x16_c;
-const SadMxNVp9Func highbd_sad_8x16_c_vp9 = vp9_highbd_sad8x16_c;
-const SadMxNVp9Func highbd_sad_16x8_c_vp9 = vp9_highbd_sad16x8_c;
-const SadMxNVp9Func highbd_sad_8x8_c_vp9 = vp9_highbd_sad8x8_c;
-const SadMxNVp9Func highbd_sad_8x4_c_vp9 = vp9_highbd_sad8x4_c;
-const SadMxNVp9Func highbd_sad_4x8_c_vp9 = vp9_highbd_sad4x8_c;
-const SadMxNVp9Func highbd_sad_4x4_c_vp9 = vp9_highbd_sad4x4_c;
-const SadMxNVp9Param c_vp9_highbd_8_tests[] = {
-  make_tuple(64, 64, highbd_sad_64x64_c_vp9, 8),
-  make_tuple(32, 32, highbd_sad_32x32_c_vp9, 8),
-  make_tuple(16, 16, highbd_sad_16x16_c_vp9, 8),
-  make_tuple(8, 16, highbd_sad_8x16_c_vp9, 8),
-  make_tuple(16, 8, highbd_sad_16x8_c_vp9, 8),
-  make_tuple(8, 8, highbd_sad_8x8_c_vp9, 8),
-  make_tuple(8, 4, highbd_sad_8x4_c_vp9, 8),
-  make_tuple(4, 8, highbd_sad_4x8_c_vp9, 8),
-  make_tuple(4, 4, highbd_sad_4x4_c_vp9, 8),
-};
-INSTANTIATE_TEST_CASE_P(C_8, SADVP9Test,
-                        ::testing::ValuesIn(c_vp9_highbd_8_tests));
-
-const SadMxNVp9Param c_vp9_highbd_10_tests[] = {
-  make_tuple(64, 64, highbd_sad_64x64_c_vp9, 10),
-  make_tuple(32, 32, highbd_sad_32x32_c_vp9, 10),
-  make_tuple(16, 16, highbd_sad_16x16_c_vp9, 10),
-  make_tuple(8, 16, highbd_sad_8x16_c_vp9, 10),
-  make_tuple(16, 8, highbd_sad_16x8_c_vp9, 10),
-  make_tuple(8, 8, highbd_sad_8x8_c_vp9, 10),
-  make_tuple(8, 4, highbd_sad_8x4_c_vp9, 10),
-  make_tuple(4, 8, highbd_sad_4x8_c_vp9, 10),
-  make_tuple(4, 4, highbd_sad_4x4_c_vp9, 10),
-};
-INSTANTIATE_TEST_CASE_P(C_10, SADVP9Test,
-                        ::testing::ValuesIn(c_vp9_highbd_10_tests));
-
-const SadMxNVp9Param c_vp9_highbd_12_tests[] = {
-  make_tuple(64, 64, highbd_sad_64x64_c_vp9, 12),
-  make_tuple(32, 32, highbd_sad_32x32_c_vp9, 12),
-  make_tuple(16, 16, highbd_sad_16x16_c_vp9, 12),
-  make_tuple(8, 16, highbd_sad_8x16_c_vp9, 12),
-  make_tuple(16, 8, highbd_sad_16x8_c_vp9, 12),
-  make_tuple(8, 8, highbd_sad_8x8_c_vp9, 12),
-  make_tuple(8, 4, highbd_sad_8x4_c_vp9, 12),
-  make_tuple(4, 8, highbd_sad_4x8_c_vp9, 12),
-  make_tuple(4, 4, highbd_sad_4x4_c_vp9, 12),
-};
-INSTANTIATE_TEST_CASE_P(C_12, SADVP9Test,
-                        ::testing::ValuesIn(c_vp9_highbd_12_tests));
-
-const SadMxNAvgVp9Func highbd_sad8x4_avg_c_vp9 = vp9_highbd_sad8x4_avg_c;
-const SadMxNAvgVp9Func highbd_sad8x8_avg_c_vp9 = vp9_highbd_sad8x8_avg_c;
-const SadMxNAvgVp9Func highbd_sad8x16_avg_c_vp9 = vp9_highbd_sad8x16_avg_c;
-const SadMxNAvgVp9Func highbd_sad16x8_avg_c_vp9 = vp9_highbd_sad16x8_avg_c;
-const SadMxNAvgVp9Func highbd_sad16x16_avg_c_vp9 = vp9_highbd_sad16x16_avg_c;
-const SadMxNAvgVp9Func highbd_sad16x32_avg_c_vp9 = vp9_highbd_sad16x32_avg_c;
-const SadMxNAvgVp9Func highbd_sad32x16_avg_c_vp9 = vp9_highbd_sad32x16_avg_c;
-const SadMxNAvgVp9Func highbd_sad32x32_avg_c_vp9 = vp9_highbd_sad32x32_avg_c;
-const SadMxNAvgVp9Func highbd_sad32x64_avg_c_vp9 = vp9_highbd_sad32x64_avg_c;
-const SadMxNAvgVp9Func highbd_sad64x32_avg_c_vp9 = vp9_highbd_sad64x32_avg_c;
-const SadMxNAvgVp9Func highbd_sad64x64_avg_c_vp9 = vp9_highbd_sad64x64_avg_c;
-SadMxNAvgVp9Param avg_c_vp9_highbd_8_tests[] = {
-  make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 8),
-  make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 8),
-  make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 8),
-  make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 8),
-  make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 8),
-  make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 8),
-  make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 8),
-  make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 8),
-  make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 8),
-  make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 8),
-  make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 8)};
-INSTANTIATE_TEST_CASE_P(C_8, SADavgVP9Test,
-                        ::testing::ValuesIn(avg_c_vp9_highbd_8_tests));
-
-SadMxNAvgVp9Param avg_c_vp9_highbd_10_tests[] = {
-  make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 10),
-  make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 10),
-  make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 10),
-  make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 10),
-  make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 10),
-  make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 10),
-  make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 10),
-  make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 10),
-  make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 10),
-  make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 10),
-  make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 10)};
-INSTANTIATE_TEST_CASE_P(C_10, SADavgVP9Test,
-                        ::testing::ValuesIn(avg_c_vp9_highbd_10_tests));
-
-SadMxNAvgVp9Param avg_c_vp9_highbd_12_tests[] = {
-  make_tuple(8, 4, highbd_sad8x4_avg_c_vp9, 12),
-  make_tuple(8, 8, highbd_sad8x8_avg_c_vp9, 12),
-  make_tuple(8, 16, highbd_sad8x16_avg_c_vp9, 12),
-  make_tuple(16, 8, highbd_sad16x8_avg_c_vp9, 12),
-  make_tuple(16, 16, highbd_sad16x16_avg_c_vp9, 12),
-  make_tuple(16, 32, highbd_sad16x32_avg_c_vp9, 12),
-  make_tuple(32, 16, highbd_sad32x16_avg_c_vp9, 12),
-  make_tuple(32, 32, highbd_sad32x32_avg_c_vp9, 12),
-  make_tuple(32, 64, highbd_sad32x64_avg_c_vp9, 12),
-  make_tuple(64, 32, highbd_sad64x32_avg_c_vp9, 12),
-  make_tuple(64, 64, highbd_sad64x64_avg_c_vp9, 12)};
-INSTANTIATE_TEST_CASE_P(C_12, SADavgVP9Test,
-                        ::testing::ValuesIn(avg_c_vp9_highbd_12_tests));
-
-const SadMxNx4Func highbd_sad_64x64x4d_c = vp9_highbd_sad64x64x4d_c;
-const SadMxNx4Func highbd_sad_64x32x4d_c = vp9_highbd_sad64x32x4d_c;
-const SadMxNx4Func highbd_sad_32x64x4d_c = vp9_highbd_sad32x64x4d_c;
-const SadMxNx4Func highbd_sad_32x32x4d_c = vp9_highbd_sad32x32x4d_c;
-const SadMxNx4Func highbd_sad_32x16x4d_c = vp9_highbd_sad32x16x4d_c;
-const SadMxNx4Func highbd_sad_16x32x4d_c = vp9_highbd_sad16x32x4d_c;
-const SadMxNx4Func highbd_sad_16x16x4d_c = vp9_highbd_sad16x16x4d_c;
-const SadMxNx4Func highbd_sad_16x8x4d_c  = vp9_highbd_sad16x8x4d_c;
-const SadMxNx4Func highbd_sad_8x16x4d_c  = vp9_highbd_sad8x16x4d_c;
-const SadMxNx4Func highbd_sad_8x8x4d_c   = vp9_highbd_sad8x8x4d_c;
-const SadMxNx4Func highbd_sad_8x4x4d_c   = vp9_highbd_sad8x4x4d_c;
-const SadMxNx4Func highbd_sad_4x8x4d_c   = vp9_highbd_sad4x8x4d_c;
-const SadMxNx4Func highbd_sad_4x4x4d_c   = vp9_highbd_sad4x4x4d_c;
-INSTANTIATE_TEST_CASE_P(C_8, SADx4Test, ::testing::Values(
-                        make_tuple(64, 64, highbd_sad_64x64x4d_c, 8),
-                        make_tuple(64, 32, highbd_sad_64x32x4d_c, 8),
-                        make_tuple(32, 64, highbd_sad_32x64x4d_c, 8),
-                        make_tuple(32, 32, highbd_sad_32x32x4d_c, 8),
-                        make_tuple(32, 16, highbd_sad_32x16x4d_c, 8),
-                        make_tuple(16, 32, highbd_sad_16x32x4d_c, 8),
-                        make_tuple(16, 16, highbd_sad_16x16x4d_c, 8),
-                        make_tuple(16, 8,  highbd_sad_16x8x4d_c,  8),
-                        make_tuple(8,  16, highbd_sad_8x16x4d_c,  8),
-                        make_tuple(8,  8,  highbd_sad_8x8x4d_c,   8),
-                        make_tuple(8,  4,  highbd_sad_8x4x4d_c,   8),
-                        make_tuple(4,  8,  highbd_sad_4x8x4d_c,   8),
-                        make_tuple(4,  4,  highbd_sad_4x4x4d_c,   8)));
-
-INSTANTIATE_TEST_CASE_P(C_10, SADx4Test, ::testing::Values(
-                        make_tuple(64, 64, highbd_sad_64x64x4d_c, 10),
-                        make_tuple(64, 32, highbd_sad_64x32x4d_c, 10),
-                        make_tuple(32, 64, highbd_sad_32x64x4d_c, 10),
-                        make_tuple(32, 32, highbd_sad_32x32x4d_c, 10),
-                        make_tuple(32, 16, highbd_sad_32x16x4d_c, 10),
-                        make_tuple(16, 32, highbd_sad_16x32x4d_c, 10),
-                        make_tuple(16, 16, highbd_sad_16x16x4d_c, 10),
-                        make_tuple(16, 8,  highbd_sad_16x8x4d_c,  10),
-                        make_tuple(8,  16, highbd_sad_8x16x4d_c,  10),
-                        make_tuple(8,  8,  highbd_sad_8x8x4d_c,   10),
-                        make_tuple(8,  4,  highbd_sad_8x4x4d_c,   10),
-                        make_tuple(4,  8,  highbd_sad_4x8x4d_c,   10),
-                        make_tuple(4,  4,  highbd_sad_4x4x4d_c,   10)));
-
-INSTANTIATE_TEST_CASE_P(C_12, SADx4Test, ::testing::Values(
-                        make_tuple(64, 64, highbd_sad_64x64x4d_c, 12),
-                        make_tuple(64, 32, highbd_sad_64x32x4d_c, 12),
-                        make_tuple(32, 64, highbd_sad_32x64x4d_c, 12),
-                        make_tuple(32, 32, highbd_sad_32x32x4d_c, 12),
-                        make_tuple(32, 16, highbd_sad_32x16x4d_c, 12),
-                        make_tuple(16, 32, highbd_sad_16x32x4d_c, 12),
-                        make_tuple(16, 16, highbd_sad_16x16x4d_c, 12),
-                        make_tuple(16, 8,  highbd_sad_16x8x4d_c,  12),
-                        make_tuple(8,  16, highbd_sad_8x16x4d_c,  12),
-                        make_tuple(8,  8,  highbd_sad_8x8x4d_c,   12),
-                        make_tuple(8,  4,  highbd_sad_8x4x4d_c,   12),
-                        make_tuple(4,  8,  highbd_sad_4x8x4d_c,   12),
-                        make_tuple(4,  4,  highbd_sad_4x4x4d_c,   12)));
+const SadMxNAvgFunc highbd_sad64x64_avg_c = vpx_highbd_sad64x64_avg_c;
+const SadMxNAvgFunc highbd_sad64x32_avg_c = vpx_highbd_sad64x32_avg_c;
+const SadMxNAvgFunc highbd_sad32x64_avg_c = vpx_highbd_sad32x64_avg_c;
+const SadMxNAvgFunc highbd_sad32x32_avg_c = vpx_highbd_sad32x32_avg_c;
+const SadMxNAvgFunc highbd_sad32x16_avg_c = vpx_highbd_sad32x16_avg_c;
+const SadMxNAvgFunc highbd_sad16x32_avg_c = vpx_highbd_sad16x32_avg_c;
+const SadMxNAvgFunc highbd_sad16x16_avg_c = vpx_highbd_sad16x16_avg_c;
+const SadMxNAvgFunc highbd_sad16x8_avg_c = vpx_highbd_sad16x8_avg_c;
+const SadMxNAvgFunc highbd_sad8x16_avg_c = vpx_highbd_sad8x16_avg_c;
+const SadMxNAvgFunc highbd_sad8x8_avg_c = vpx_highbd_sad8x8_avg_c;
+const SadMxNAvgFunc highbd_sad8x4_avg_c = vpx_highbd_sad8x4_avg_c;
+const SadMxNAvgFunc highbd_sad4x8_avg_c = vpx_highbd_sad4x8_avg_c;
+const SadMxNAvgFunc highbd_sad4x4_avg_c = vpx_highbd_sad4x4_avg_c;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // CONFIG_VP9_ENCODER
+const SadMxNAvgParam avg_c_tests[] = {
+  make_tuple(64, 64, sad64x64_avg_c, -1),
+  make_tuple(64, 32, sad64x32_avg_c, -1),
+  make_tuple(32, 64, sad32x64_avg_c, -1),
+  make_tuple(32, 32, sad32x32_avg_c, -1),
+  make_tuple(32, 16, sad32x16_avg_c, -1),
+  make_tuple(16, 32, sad16x32_avg_c, -1),
+  make_tuple(16, 16, sad16x16_avg_c, -1),
+  make_tuple(16, 8, sad16x8_avg_c, -1),
+  make_tuple(8, 16, sad8x16_avg_c, -1),
+  make_tuple(8, 8, sad8x8_avg_c, -1),
+  make_tuple(8, 4, sad8x4_avg_c, -1),
+  make_tuple(4, 8, sad4x8_avg_c, -1),
+  make_tuple(4, 4, sad4x4_avg_c, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(64, 64, highbd_sad64x64_avg_c, 8),
+  make_tuple(64, 32, highbd_sad64x32_avg_c, 8),
+  make_tuple(32, 64, highbd_sad32x64_avg_c, 8),
+  make_tuple(32, 32, highbd_sad32x32_avg_c, 8),
+  make_tuple(32, 16, highbd_sad32x16_avg_c, 8),
+  make_tuple(16, 32, highbd_sad16x32_avg_c, 8),
+  make_tuple(16, 16, highbd_sad16x16_avg_c, 8),
+  make_tuple(16, 8, highbd_sad16x8_avg_c, 8),
+  make_tuple(8, 16, highbd_sad8x16_avg_c, 8),
+  make_tuple(8, 8, highbd_sad8x8_avg_c, 8),
+  make_tuple(8, 4, highbd_sad8x4_avg_c, 8),
+  make_tuple(4, 8, highbd_sad4x8_avg_c, 8),
+  make_tuple(4, 4, highbd_sad4x4_avg_c, 8),
+  make_tuple(64, 64, highbd_sad64x64_avg_c, 10),
+  make_tuple(64, 32, highbd_sad64x32_avg_c, 10),
+  make_tuple(32, 64, highbd_sad32x64_avg_c, 10),
+  make_tuple(32, 32, highbd_sad32x32_avg_c, 10),
+  make_tuple(32, 16, highbd_sad32x16_avg_c, 10),
+  make_tuple(16, 32, highbd_sad16x32_avg_c, 10),
+  make_tuple(16, 16, highbd_sad16x16_avg_c, 10),
+  make_tuple(16, 8, highbd_sad16x8_avg_c, 10),
+  make_tuple(8, 16, highbd_sad8x16_avg_c, 10),
+  make_tuple(8, 8, highbd_sad8x8_avg_c, 10),
+  make_tuple(8, 4, highbd_sad8x4_avg_c, 10),
+  make_tuple(4, 8, highbd_sad4x8_avg_c, 10),
+  make_tuple(4, 4, highbd_sad4x4_avg_c, 10),
+  make_tuple(64, 64, highbd_sad64x64_avg_c, 12),
+  make_tuple(64, 32, highbd_sad64x32_avg_c, 12),
+  make_tuple(32, 64, highbd_sad32x64_avg_c, 12),
+  make_tuple(32, 32, highbd_sad32x32_avg_c, 12),
+  make_tuple(32, 16, highbd_sad32x16_avg_c, 12),
+  make_tuple(16, 32, highbd_sad16x32_avg_c, 12),
+  make_tuple(16, 16, highbd_sad16x16_avg_c, 12),
+  make_tuple(16, 8, highbd_sad16x8_avg_c, 12),
+  make_tuple(8, 16, highbd_sad8x16_avg_c, 12),
+  make_tuple(8, 8, highbd_sad8x8_avg_c, 12),
+  make_tuple(8, 4, highbd_sad8x4_avg_c, 12),
+  make_tuple(4, 8, highbd_sad4x8_avg_c, 12),
+  make_tuple(4, 4, highbd_sad4x4_avg_c, 12),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
+
+const SadMxNx4Func sad64x64x4d_c = vpx_sad64x64x4d_c;
+const SadMxNx4Func sad64x32x4d_c = vpx_sad64x32x4d_c;
+const SadMxNx4Func sad32x64x4d_c = vpx_sad32x64x4d_c;
+const SadMxNx4Func sad32x32x4d_c = vpx_sad32x32x4d_c;
+const SadMxNx4Func sad32x16x4d_c = vpx_sad32x16x4d_c;
+const SadMxNx4Func sad16x32x4d_c = vpx_sad16x32x4d_c;
+const SadMxNx4Func sad16x16x4d_c = vpx_sad16x16x4d_c;
+const SadMxNx4Func sad16x8x4d_c = vpx_sad16x8x4d_c;
+const SadMxNx4Func sad8x16x4d_c = vpx_sad8x16x4d_c;
+const SadMxNx4Func sad8x8x4d_c = vpx_sad8x8x4d_c;
+const SadMxNx4Func sad8x4x4d_c = vpx_sad8x4x4d_c;
+const SadMxNx4Func sad4x8x4d_c = vpx_sad4x8x4d_c;
+const SadMxNx4Func sad4x4x4d_c = vpx_sad4x4x4d_c;
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNx4Func highbd_sad64x64x4d_c = vpx_highbd_sad64x64x4d_c;
+const SadMxNx4Func highbd_sad64x32x4d_c = vpx_highbd_sad64x32x4d_c;
+const SadMxNx4Func highbd_sad32x64x4d_c = vpx_highbd_sad32x64x4d_c;
+const SadMxNx4Func highbd_sad32x32x4d_c = vpx_highbd_sad32x32x4d_c;
+const SadMxNx4Func highbd_sad32x16x4d_c = vpx_highbd_sad32x16x4d_c;
+const SadMxNx4Func highbd_sad16x32x4d_c = vpx_highbd_sad16x32x4d_c;
+const SadMxNx4Func highbd_sad16x16x4d_c = vpx_highbd_sad16x16x4d_c;
+const SadMxNx4Func highbd_sad16x8x4d_c = vpx_highbd_sad16x8x4d_c;
+const SadMxNx4Func highbd_sad8x16x4d_c = vpx_highbd_sad8x16x4d_c;
+const SadMxNx4Func highbd_sad8x8x4d_c = vpx_highbd_sad8x8x4d_c;
+const SadMxNx4Func highbd_sad8x4x4d_c = vpx_highbd_sad8x4x4d_c;
+const SadMxNx4Func highbd_sad4x8x4d_c = vpx_highbd_sad4x8x4d_c;
+const SadMxNx4Func highbd_sad4x4x4d_c = vpx_highbd_sad4x4x4d_c;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+const SadMxNx4Param x4d_c_tests[] = {
+  make_tuple(64, 64, sad64x64x4d_c, -1),
+  make_tuple(64, 32, sad64x32x4d_c, -1),
+  make_tuple(32, 64, sad32x64x4d_c, -1),
+  make_tuple(32, 32, sad32x32x4d_c, -1),
+  make_tuple(32, 16, sad32x16x4d_c, -1),
+  make_tuple(16, 32, sad16x32x4d_c, -1),
+  make_tuple(16, 16, sad16x16x4d_c, -1),
+  make_tuple(16, 8, sad16x8x4d_c, -1),
+  make_tuple(8, 16, sad8x16x4d_c, -1),
+  make_tuple(8, 8, sad8x8x4d_c, -1),
+  make_tuple(8, 4, sad8x4x4d_c, -1),
+  make_tuple(4, 8, sad4x8x4d_c, -1),
+  make_tuple(4, 4, sad4x4x4d_c, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(64, 64, highbd_sad64x64x4d_c, 8),
+  make_tuple(64, 32, highbd_sad64x32x4d_c, 8),
+  make_tuple(32, 64, highbd_sad32x64x4d_c, 8),
+  make_tuple(32, 32, highbd_sad32x32x4d_c, 8),
+  make_tuple(32, 16, highbd_sad32x16x4d_c, 8),
+  make_tuple(16, 32, highbd_sad16x32x4d_c, 8),
+  make_tuple(16, 16, highbd_sad16x16x4d_c, 8),
+  make_tuple(16, 8, highbd_sad16x8x4d_c, 8),
+  make_tuple(8, 16, highbd_sad8x16x4d_c, 8),
+  make_tuple(8, 8, highbd_sad8x8x4d_c, 8),
+  make_tuple(8, 4, highbd_sad8x4x4d_c, 8),
+  make_tuple(4, 8, highbd_sad4x8x4d_c, 8),
+  make_tuple(4, 4, highbd_sad4x4x4d_c, 8),
+  make_tuple(64, 64, highbd_sad64x64x4d_c, 10),
+  make_tuple(64, 32, highbd_sad64x32x4d_c, 10),
+  make_tuple(32, 64, highbd_sad32x64x4d_c, 10),
+  make_tuple(32, 32, highbd_sad32x32x4d_c, 10),
+  make_tuple(32, 16, highbd_sad32x16x4d_c, 10),
+  make_tuple(16, 32, highbd_sad16x32x4d_c, 10),
+  make_tuple(16, 16, highbd_sad16x16x4d_c, 10),
+  make_tuple(16, 8, highbd_sad16x8x4d_c, 10),
+  make_tuple(8, 16, highbd_sad8x16x4d_c, 10),
+  make_tuple(8, 8, highbd_sad8x8x4d_c, 10),
+  make_tuple(8, 4, highbd_sad8x4x4d_c, 10),
+  make_tuple(4, 8, highbd_sad4x8x4d_c, 10),
+  make_tuple(4, 4, highbd_sad4x4x4d_c, 10),
+  make_tuple(64, 64, highbd_sad64x64x4d_c, 12),
+  make_tuple(64, 32, highbd_sad64x32x4d_c, 12),
+  make_tuple(32, 64, highbd_sad32x64x4d_c, 12),
+  make_tuple(32, 32, highbd_sad32x32x4d_c, 12),
+  make_tuple(32, 16, highbd_sad32x16x4d_c, 12),
+  make_tuple(16, 32, highbd_sad16x32x4d_c, 12),
+  make_tuple(16, 16, highbd_sad16x16x4d_c, 12),
+  make_tuple(16, 8, highbd_sad16x8x4d_c, 12),
+  make_tuple(8, 16, highbd_sad8x16x4d_c, 12),
+  make_tuple(8, 8, highbd_sad8x8x4d_c, 12),
+  make_tuple(8, 4, highbd_sad8x4x4d_c, 12),
+  make_tuple(4, 8, highbd_sad4x8x4d_c, 12),
+  make_tuple(4, 4, highbd_sad4x4x4d_c, 12),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
 
 //------------------------------------------------------------------------------
 // ARM functions
 #if HAVE_MEDIA
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_armv6 = vp8_sad16x16_armv6;
-INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values(
-                        make_tuple(16, 16, sad_16x16_armv6, -1)));
-#endif  // CONFIG_VP8_ENCODER
+const SadMxNFunc sad16x16_media = vpx_sad16x16_media;
+const SadMxNParam media_tests[] = {
+  make_tuple(16, 16, sad16x16_media, -1),
+};
+INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::ValuesIn(media_tests));
 #endif  // HAVE_MEDIA
 
 #if HAVE_NEON
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_neon = vp8_sad16x16_neon;
-const SadMxNFunc sad_8x16_neon = vp8_sad8x16_neon;
-const SadMxNFunc sad_16x8_neon = vp8_sad16x8_neon;
-const SadMxNFunc sad_8x8_neon = vp8_sad8x8_neon;
-const SadMxNFunc sad_4x4_neon = vp8_sad4x4_neon;
-INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values(
-                        make_tuple(16, 16, sad_16x16_neon, -1),
-                        make_tuple(8, 16, sad_8x16_neon, -1),
-                        make_tuple(16, 8, sad_16x8_neon, -1),
-                        make_tuple(8, 8, sad_8x8_neon, -1),
-                        make_tuple(4, 4, sad_4x4_neon, -1)));
-#endif  // CONFIG_VP8_ENCODER
-#if CONFIG_VP9_ENCODER
-const SadMxNVp9Func sad_64x64_neon_vp9 = vp9_sad64x64_neon;
-const SadMxNVp9Func sad_32x32_neon_vp9 = vp9_sad32x32_neon;
-const SadMxNVp9Func sad_16x16_neon_vp9 = vp9_sad16x16_neon;
-const SadMxNVp9Func sad_8x8_neon_vp9 = vp9_sad8x8_neon;
-const SadMxNVp9Param neon_vp9_tests[] = {
-  make_tuple(64, 64, sad_64x64_neon_vp9, -1),
-  make_tuple(32, 32, sad_32x32_neon_vp9, -1),
-  make_tuple(16, 16, sad_16x16_neon_vp9, -1),
-  make_tuple(8, 8, sad_8x8_neon_vp9, -1),
+const SadMxNFunc sad64x64_neon = vpx_sad64x64_neon;
+const SadMxNFunc sad32x32_neon = vpx_sad32x32_neon;
+const SadMxNFunc sad16x16_neon = vpx_sad16x16_neon;
+const SadMxNFunc sad16x8_neon = vpx_sad16x8_neon;
+const SadMxNFunc sad8x16_neon = vpx_sad8x16_neon;
+const SadMxNFunc sad8x8_neon = vpx_sad8x8_neon;
+const SadMxNFunc sad4x4_neon = vpx_sad4x4_neon;
+
+const SadMxNParam neon_tests[] = {
+  make_tuple(64, 64, sad64x64_neon, -1),
+  make_tuple(32, 32, sad32x32_neon, -1),
+  make_tuple(16, 16, sad16x16_neon, -1),
+  make_tuple(16, 8, sad16x8_neon, -1),
+  make_tuple(8, 16, sad8x16_neon, -1),
+  make_tuple(8, 8, sad8x8_neon, -1),
+  make_tuple(4, 4, sad4x4_neon, -1),
 };
-INSTANTIATE_TEST_CASE_P(NEON, SADVP9Test, ::testing::ValuesIn(neon_vp9_tests));
-#endif  // CONFIG_VP9_ENCODER
+INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
+
+const SadMxNx4Func sad64x64x4d_neon = vpx_sad64x64x4d_neon;
+const SadMxNx4Func sad32x32x4d_neon = vpx_sad32x32x4d_neon;
+const SadMxNx4Func sad16x16x4d_neon = vpx_sad16x16x4d_neon;
+const SadMxNx4Param x4d_neon_tests[] = {
+  make_tuple(64, 64, sad64x64x4d_neon, -1),
+  make_tuple(32, 32, sad32x32x4d_neon, -1),
+  make_tuple(16, 16, sad16x16x4d_neon, -1),
+};
+INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
 #endif  // HAVE_NEON
 
 //------------------------------------------------------------------------------
 // x86 functions
 #if HAVE_MMX
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_mmx = vp8_sad16x16_mmx;
-const SadMxNFunc sad_8x16_mmx = vp8_sad8x16_mmx;
-const SadMxNFunc sad_16x8_mmx = vp8_sad16x8_mmx;
-const SadMxNFunc sad_8x8_mmx = vp8_sad8x8_mmx;
-const SadMxNFunc sad_4x4_mmx = vp8_sad4x4_mmx;
+const SadMxNFunc sad16x16_mmx = vpx_sad16x16_mmx;
+const SadMxNFunc sad16x8_mmx = vpx_sad16x8_mmx;
+const SadMxNFunc sad8x16_mmx = vpx_sad8x16_mmx;
+const SadMxNFunc sad8x8_mmx = vpx_sad8x8_mmx;
+const SadMxNFunc sad4x4_mmx = vpx_sad4x4_mmx;
 const SadMxNParam mmx_tests[] = {
-  make_tuple(16, 16, sad_16x16_mmx, -1),
-  make_tuple(8, 16, sad_8x16_mmx, -1),
-  make_tuple(16, 8, sad_16x8_mmx, -1),
-  make_tuple(8, 8, sad_8x8_mmx, -1),
-  make_tuple(4, 4, sad_4x4_mmx, -1),
+  make_tuple(16, 16, sad16x16_mmx, -1),
+  make_tuple(16, 8, sad16x8_mmx, -1),
+  make_tuple(8, 16, sad8x16_mmx, -1),
+  make_tuple(8, 8, sad8x8_mmx, -1),
+  make_tuple(4, 4, sad4x4_mmx, -1),
 };
 INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
-#endif  // CONFIG_VP8_ENCODER
-
 #endif  // HAVE_MMX
 
 #if HAVE_SSE
-#if CONFIG_VP9_ENCODER
 #if CONFIG_USE_X86INC
-const SadMxNVp9Func sad_4x4_sse_vp9 = vp9_sad4x4_sse;
-const SadMxNVp9Func sad_4x8_sse_vp9 = vp9_sad4x8_sse;
-INSTANTIATE_TEST_CASE_P(SSE, SADVP9Test, ::testing::Values(
-                        make_tuple(4, 4, sad_4x4_sse_vp9, -1),
-                        make_tuple(4, 8, sad_4x8_sse_vp9, -1)));
+const SadMxNFunc sad4x8_sse = vpx_sad4x8_sse;
+const SadMxNFunc sad4x4_sse = vpx_sad4x4_sse;
+const SadMxNParam sse_tests[] = {
+  make_tuple(4, 8, sad4x8_sse, -1),
+  make_tuple(4, 4, sad4x4_sse, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::ValuesIn(sse_tests));
 
-const SadMxNx4Func sad_4x8x4d_sse = vp9_sad4x8x4d_sse;
-const SadMxNx4Func sad_4x4x4d_sse = vp9_sad4x4x4d_sse;
-INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
-                        make_tuple(4, 8, sad_4x8x4d_sse, -1),
-                        make_tuple(4, 4, sad_4x4x4d_sse, -1)));
+const SadMxNAvgFunc sad4x8_avg_sse = vpx_sad4x8_avg_sse;
+const SadMxNAvgFunc sad4x4_avg_sse = vpx_sad4x4_avg_sse;
+const SadMxNAvgParam avg_sse_tests[] = {
+  make_tuple(4, 8, sad4x8_avg_sse, -1),
+  make_tuple(4, 4, sad4x4_avg_sse, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSE, SADavgTest, ::testing::ValuesIn(avg_sse_tests));
+
+const SadMxNx4Func sad4x8x4d_sse = vpx_sad4x8x4d_sse;
+const SadMxNx4Func sad4x4x4d_sse = vpx_sad4x4x4d_sse;
+const SadMxNx4Param x4d_sse_tests[] = {
+  make_tuple(4, 8, sad4x8x4d_sse, -1),
+  make_tuple(4, 4, sad4x4x4d_sse, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests));
 #endif  // CONFIG_USE_X86INC
-#endif  // CONFIG_VP9_ENCODER
 #endif  // HAVE_SSE
 
 #if HAVE_SSE2
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_wmt = vp8_sad16x16_wmt;
-const SadMxNFunc sad_8x16_wmt = vp8_sad8x16_wmt;
-const SadMxNFunc sad_16x8_wmt = vp8_sad16x8_wmt;
-const SadMxNFunc sad_8x8_wmt = vp8_sad8x8_wmt;
-const SadMxNFunc sad_4x4_wmt = vp8_sad4x4_wmt;
+#if CONFIG_USE_X86INC
+const SadMxNFunc sad64x64_sse2 = vpx_sad64x64_sse2;
+const SadMxNFunc sad64x32_sse2 = vpx_sad64x32_sse2;
+const SadMxNFunc sad32x64_sse2 = vpx_sad32x64_sse2;
+const SadMxNFunc sad32x32_sse2 = vpx_sad32x32_sse2;
+const SadMxNFunc sad32x16_sse2 = vpx_sad32x16_sse2;
+const SadMxNFunc sad16x32_sse2 = vpx_sad16x32_sse2;
+const SadMxNFunc sad16x16_sse2 = vpx_sad16x16_sse2;
+const SadMxNFunc sad16x8_sse2 = vpx_sad16x8_sse2;
+const SadMxNFunc sad8x16_sse2 = vpx_sad8x16_sse2;
+const SadMxNFunc sad8x8_sse2 = vpx_sad8x8_sse2;
+const SadMxNFunc sad8x4_sse2 = vpx_sad8x4_sse2;
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNFunc highbd_sad64x64_sse2 = vpx_highbd_sad64x64_sse2;
+const SadMxNFunc highbd_sad64x32_sse2 = vpx_highbd_sad64x32_sse2;
+const SadMxNFunc highbd_sad32x64_sse2 = vpx_highbd_sad32x64_sse2;
+const SadMxNFunc highbd_sad32x32_sse2 = vpx_highbd_sad32x32_sse2;
+const SadMxNFunc highbd_sad32x16_sse2 = vpx_highbd_sad32x16_sse2;
+const SadMxNFunc highbd_sad16x32_sse2 = vpx_highbd_sad16x32_sse2;
+const SadMxNFunc highbd_sad16x16_sse2 = vpx_highbd_sad16x16_sse2;
+const SadMxNFunc highbd_sad16x8_sse2 = vpx_highbd_sad16x8_sse2;
+const SadMxNFunc highbd_sad8x16_sse2 = vpx_highbd_sad8x16_sse2;
+const SadMxNFunc highbd_sad8x8_sse2 = vpx_highbd_sad8x8_sse2;
+const SadMxNFunc highbd_sad8x4_sse2 = vpx_highbd_sad8x4_sse2;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 const SadMxNParam sse2_tests[] = {
-  make_tuple(16, 16, sad_16x16_wmt, -1),
-  make_tuple(8, 16, sad_8x16_wmt, -1),
-  make_tuple(16, 8, sad_16x8_wmt, -1),
-  make_tuple(8, 8, sad_8x8_wmt, -1),
-  make_tuple(4, 4, sad_4x4_wmt, -1),
+  make_tuple(64, 64, sad64x64_sse2, -1),
+  make_tuple(64, 32, sad64x32_sse2, -1),
+  make_tuple(32, 64, sad32x64_sse2, -1),
+  make_tuple(32, 32, sad32x32_sse2, -1),
+  make_tuple(32, 16, sad32x16_sse2, -1),
+  make_tuple(16, 32, sad16x32_sse2, -1),
+  make_tuple(16, 16, sad16x16_sse2, -1),
+  make_tuple(16, 8, sad16x8_sse2, -1),
+  make_tuple(8, 16, sad8x16_sse2, -1),
+  make_tuple(8, 8, sad8x8_sse2, -1),
+  make_tuple(8, 4, sad8x4_sse2, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(64, 64, highbd_sad64x64_sse2, 8),
+  make_tuple(64, 32, highbd_sad64x32_sse2, 8),
+  make_tuple(32, 64, highbd_sad32x64_sse2, 8),
+  make_tuple(32, 32, highbd_sad32x32_sse2, 8),
+  make_tuple(32, 16, highbd_sad32x16_sse2, 8),
+  make_tuple(16, 32, highbd_sad16x32_sse2, 8),
+  make_tuple(16, 16, highbd_sad16x16_sse2, 8),
+  make_tuple(16, 8, highbd_sad16x8_sse2, 8),
+  make_tuple(8, 16, highbd_sad8x16_sse2, 8),
+  make_tuple(8, 8, highbd_sad8x8_sse2, 8),
+  make_tuple(8, 4, highbd_sad8x4_sse2, 8),
+  make_tuple(64, 64, highbd_sad64x64_sse2, 10),
+  make_tuple(64, 32, highbd_sad64x32_sse2, 10),
+  make_tuple(32, 64, highbd_sad32x64_sse2, 10),
+  make_tuple(32, 32, highbd_sad32x32_sse2, 10),
+  make_tuple(32, 16, highbd_sad32x16_sse2, 10),
+  make_tuple(16, 32, highbd_sad16x32_sse2, 10),
+  make_tuple(16, 16, highbd_sad16x16_sse2, 10),
+  make_tuple(16, 8, highbd_sad16x8_sse2, 10),
+  make_tuple(8, 16, highbd_sad8x16_sse2, 10),
+  make_tuple(8, 8, highbd_sad8x8_sse2, 10),
+  make_tuple(8, 4, highbd_sad8x4_sse2, 10),
+  make_tuple(64, 64, highbd_sad64x64_sse2, 12),
+  make_tuple(64, 32, highbd_sad64x32_sse2, 12),
+  make_tuple(32, 64, highbd_sad32x64_sse2, 12),
+  make_tuple(32, 32, highbd_sad32x32_sse2, 12),
+  make_tuple(32, 16, highbd_sad32x16_sse2, 12),
+  make_tuple(16, 32, highbd_sad16x32_sse2, 12),
+  make_tuple(16, 16, highbd_sad16x16_sse2, 12),
+  make_tuple(16, 8, highbd_sad16x8_sse2, 12),
+  make_tuple(8, 16, highbd_sad8x16_sse2, 12),
+  make_tuple(8, 8, highbd_sad8x8_sse2, 12),
+  make_tuple(8, 4, highbd_sad8x4_sse2, 12),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
-#endif  // CONFIG_VP8_ENCODER
 
-#if CONFIG_VP9_ENCODER
-#if CONFIG_USE_X86INC
-const SadMxNVp9Func sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
-const SadMxNVp9Func sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
-const SadMxNVp9Func sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
-const SadMxNVp9Func sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
-const SadMxNVp9Func sad_32x16_sse2_vp9 = vp9_sad32x16_sse2;
-const SadMxNVp9Func sad_16x32_sse2_vp9 = vp9_sad16x32_sse2;
-const SadMxNVp9Func sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
-const SadMxNVp9Func sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
-const SadMxNVp9Func sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
-const SadMxNVp9Func sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
-const SadMxNVp9Func sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
-
-const SadMxNx4Func sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
-const SadMxNx4Func sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
-const SadMxNx4Func sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
-const SadMxNx4Func sad_32x32x4d_sse2 = vp9_sad32x32x4d_sse2;
-const SadMxNx4Func sad_32x16x4d_sse2 = vp9_sad32x16x4d_sse2;
-const SadMxNx4Func sad_16x32x4d_sse2 = vp9_sad16x32x4d_sse2;
-const SadMxNx4Func sad_16x16x4d_sse2 = vp9_sad16x16x4d_sse2;
-const SadMxNx4Func sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2;
-const SadMxNx4Func sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2;
-const SadMxNx4Func sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2;
-const SadMxNx4Func sad_8x4x4d_sse2 = vp9_sad8x4x4d_sse2;
-
+const SadMxNAvgFunc sad64x64_avg_sse2 = vpx_sad64x64_avg_sse2;
+const SadMxNAvgFunc sad64x32_avg_sse2 = vpx_sad64x32_avg_sse2;
+const SadMxNAvgFunc sad32x64_avg_sse2 = vpx_sad32x64_avg_sse2;
+const SadMxNAvgFunc sad32x32_avg_sse2 = vpx_sad32x32_avg_sse2;
+const SadMxNAvgFunc sad32x16_avg_sse2 = vpx_sad32x16_avg_sse2;
+const SadMxNAvgFunc sad16x32_avg_sse2 = vpx_sad16x32_avg_sse2;
+const SadMxNAvgFunc sad16x16_avg_sse2 = vpx_sad16x16_avg_sse2;
+const SadMxNAvgFunc sad16x8_avg_sse2 = vpx_sad16x8_avg_sse2;
+const SadMxNAvgFunc sad8x16_avg_sse2 = vpx_sad8x16_avg_sse2;
+const SadMxNAvgFunc sad8x8_avg_sse2 = vpx_sad8x8_avg_sse2;
+const SadMxNAvgFunc sad8x4_avg_sse2 = vpx_sad8x4_avg_sse2;
 #if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNVp9Func highbd_sad8x4_sse2_vp9 = vp9_highbd_sad8x4_sse2;
-const SadMxNVp9Func highbd_sad8x8_sse2_vp9 = vp9_highbd_sad8x8_sse2;
-const SadMxNVp9Func highbd_sad8x16_sse2_vp9 = vp9_highbd_sad8x16_sse2;
-const SadMxNVp9Func highbd_sad16x8_sse2_vp9 = vp9_highbd_sad16x8_sse2;
-const SadMxNVp9Func highbd_sad16x16_sse2_vp9 = vp9_highbd_sad16x16_sse2;
-const SadMxNVp9Func highbd_sad16x32_sse2_vp9 = vp9_highbd_sad16x32_sse2;
-const SadMxNVp9Func highbd_sad32x16_sse2_vp9 = vp9_highbd_sad32x16_sse2;
-const SadMxNVp9Func highbd_sad32x32_sse2_vp9 = vp9_highbd_sad32x32_sse2;
-const SadMxNVp9Func highbd_sad32x64_sse2_vp9 = vp9_highbd_sad32x64_sse2;
-const SadMxNVp9Func highbd_sad64x32_sse2_vp9 = vp9_highbd_sad64x32_sse2;
-const SadMxNVp9Func highbd_sad64x64_sse2_vp9 = vp9_highbd_sad64x64_sse2;
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
-                        make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
-                        make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
-                        make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
-                        make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
-                        make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
-                        make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
-                        make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
-                        make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
-                        make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
-                        make_tuple(8, 4, sad_8x4_sse2_vp9, -1),
-                        make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 8),
-                        make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 8),
-                        make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 8),
-                        make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 8),
-                        make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 8),
-                        make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 8),
-                        make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 8),
-                        make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 8),
-                        make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 8),
-                        make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 8),
-                        make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 8),
-                        make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 10),
-                        make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 10),
-                        make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 10),
-                        make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 10),
-                        make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 10),
-                        make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 10),
-                        make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 10),
-                        make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 10),
-                        make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 10),
-                        make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 10),
-                        make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 10),
-                        make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 12),
-                        make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 12),
-                        make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 12),
-                        make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 12),
-                        make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 12),
-                        make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 12),
-                        make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 12),
-                        make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 12),
-                        make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 12),
-                        make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 12),
-                        make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 12)));
-
-const SadMxNAvgVp9Func highbd_sad8x4_avg_sse2_vp9 = vp9_highbd_sad8x4_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad8x8_avg_sse2_vp9 = vp9_highbd_sad8x8_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad8x16_avg_sse2_vp9 =
-  vp9_highbd_sad8x16_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad16x8_avg_sse2_vp9 =
-  vp9_highbd_sad16x8_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad16x16_avg_sse2_vp9 =
-  vp9_highbd_sad16x16_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad16x32_avg_sse2_vp9 =
-  vp9_highbd_sad16x32_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad32x16_avg_sse2_vp9 =
-  vp9_highbd_sad32x16_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad32x32_avg_sse2_vp9 =
-  vp9_highbd_sad32x32_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad32x64_avg_sse2_vp9 =
-  vp9_highbd_sad32x64_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad64x32_avg_sse2_vp9 =
-  vp9_highbd_sad64x32_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad64x64_avg_sse2_vp9 =
-  vp9_highbd_sad64x64_avg_sse2;
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADavgVP9Test, ::testing::Values(
-                        make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 8),
-                        make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 8),
-                        make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 8),
-                        make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 8),
-                        make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 8),
-                        make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 8),
-                        make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 8),
-                        make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 8),
-                        make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 8),
-                        make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 8),
-                        make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 8),
-                        make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 10),
-                        make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 10),
-                        make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 10),
-                        make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 10),
-                        make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 10),
-                        make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 10),
-                        make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 10),
-                        make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 10),
-                        make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 10),
-                        make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 10),
-                        make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 10),
-                        make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 12),
-                        make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 12),
-                        make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 12),
-                        make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 12),
-                        make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 12),
-                        make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 12),
-                        make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 12),
-                        make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 12),
-                        make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 12),
-                        make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 12),
-                        make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 12)));
-
-const SadMxNx4Func highbd_sad_64x64x4d_sse2 = vp9_highbd_sad64x64x4d_sse2;
-const SadMxNx4Func highbd_sad_64x32x4d_sse2 = vp9_highbd_sad64x32x4d_sse2;
-const SadMxNx4Func highbd_sad_32x64x4d_sse2 = vp9_highbd_sad32x64x4d_sse2;
-const SadMxNx4Func highbd_sad_32x32x4d_sse2 = vp9_highbd_sad32x32x4d_sse2;
-const SadMxNx4Func highbd_sad_32x16x4d_sse2 = vp9_highbd_sad32x16x4d_sse2;
-const SadMxNx4Func highbd_sad_16x32x4d_sse2 = vp9_highbd_sad16x32x4d_sse2;
-const SadMxNx4Func highbd_sad_16x16x4d_sse2 = vp9_highbd_sad16x16x4d_sse2;
-const SadMxNx4Func highbd_sad_16x8x4d_sse2 = vp9_highbd_sad16x8x4d_sse2;
-const SadMxNx4Func highbd_sad_8x16x4d_sse2 = vp9_highbd_sad8x16x4d_sse2;
-const SadMxNx4Func highbd_sad_8x8x4d_sse2 = vp9_highbd_sad8x8x4d_sse2;
-const SadMxNx4Func highbd_sad_8x4x4d_sse2 = vp9_highbd_sad8x4x4d_sse2;
-const SadMxNx4Func highbd_sad_4x8x4d_sse2 = vp9_highbd_sad4x8x4d_sse2;
-const SadMxNx4Func highbd_sad_4x4x4d_sse2 = vp9_highbd_sad4x4x4d_sse2;
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64x4d_sse2, -1),
-                        make_tuple(64, 32, sad_64x32x4d_sse2, -1),
-                        make_tuple(32, 64, sad_32x64x4d_sse2, -1),
-                        make_tuple(32, 32, sad_32x32x4d_sse2, -1),
-                        make_tuple(32, 16, sad_32x16x4d_sse2, -1),
-                        make_tuple(16, 32, sad_16x32x4d_sse2, -1),
-                        make_tuple(16, 16, sad_16x16x4d_sse2, -1),
-                        make_tuple(16, 8, sad_16x8x4d_sse2,  -1),
-                        make_tuple(8, 16, sad_8x16x4d_sse2,  -1),
-                        make_tuple(8, 8, sad_8x8x4d_sse2,   -1),
-                        make_tuple(8, 4, sad_8x4x4d_sse2,   -1),
-                        make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 8),
-                        make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 8),
-                        make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 8),
-                        make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 8),
-                        make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 8),
-                        make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 8),
-                        make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 8),
-                        make_tuple(16, 8, highbd_sad_16x8x4d_sse2,  8),
-                        make_tuple(8, 16, highbd_sad_8x16x4d_sse2,  8),
-                        make_tuple(8, 8, highbd_sad_8x8x4d_sse2,   8),
-                        make_tuple(8, 4, highbd_sad_8x4x4d_sse2,   8),
-                        make_tuple(4, 8, highbd_sad_4x8x4d_sse2,   8),
-                        make_tuple(4, 4, highbd_sad_4x4x4d_sse2,   8),
-                        make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 10),
-                        make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 10),
-                        make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 10),
-                        make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 10),
-                        make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 10),
-                        make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 10),
-                        make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 10),
-                        make_tuple(16, 8, highbd_sad_16x8x4d_sse2,  10),
-                        make_tuple(8, 16, highbd_sad_8x16x4d_sse2,  10),
-                        make_tuple(8, 8, highbd_sad_8x8x4d_sse2,   10),
-                        make_tuple(8, 4, highbd_sad_8x4x4d_sse2,   10),
-                        make_tuple(4, 8, highbd_sad_4x8x4d_sse2,   10),
-                        make_tuple(4, 4, highbd_sad_4x4x4d_sse2,   10),
-                        make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 12),
-                        make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 12),
-                        make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 12),
-                        make_tuple(32, 32, highbd_sad_32x32x4d_sse2, 12),
-                        make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 12),
-                        make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 12),
-                        make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 12),
-                        make_tuple(16, 8, highbd_sad_16x8x4d_sse2,  12),
-                        make_tuple(8, 16, highbd_sad_8x16x4d_sse2,  12),
-                        make_tuple(8, 8, highbd_sad_8x8x4d_sse2,   12),
-                        make_tuple(8, 4, highbd_sad_8x4x4d_sse2,   12),
-                        make_tuple(4, 8, highbd_sad_4x8x4d_sse2,   12),
-                        make_tuple(4, 4, highbd_sad_4x4x4d_sse2,   12)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
-                        make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
-                        make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
-                        make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
-                        make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
-                        make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
-                        make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
-                        make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
-                        make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
-                        make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
-                        make_tuple(8, 4, sad_8x4_sse2_vp9, -1)));
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64x4d_sse2, -1),
-                        make_tuple(64, 32, sad_64x32x4d_sse2, -1),
-                        make_tuple(32, 64, sad_32x64x4d_sse2, -1),
-                        make_tuple(32, 32, sad_32x32x4d_sse2, -1),
-                        make_tuple(32, 16, sad_32x16x4d_sse2, -1),
-                        make_tuple(16, 32, sad_16x32x4d_sse2, -1),
-                        make_tuple(16, 16, sad_16x16x4d_sse2, -1),
-                        make_tuple(16, 8, sad_16x8x4d_sse2,  -1),
-                        make_tuple(8, 16, sad_8x16x4d_sse2,  -1),
-                        make_tuple(8, 8, sad_8x8x4d_sse2,   -1),
-                        make_tuple(8, 4, sad_8x4x4d_sse2,   -1)));
+const SadMxNAvgFunc highbd_sad64x64_avg_sse2 = vpx_highbd_sad64x64_avg_sse2;
+const SadMxNAvgFunc highbd_sad64x32_avg_sse2 = vpx_highbd_sad64x32_avg_sse2;
+const SadMxNAvgFunc highbd_sad32x64_avg_sse2 = vpx_highbd_sad32x64_avg_sse2;
+const SadMxNAvgFunc highbd_sad32x32_avg_sse2 = vpx_highbd_sad32x32_avg_sse2;
+const SadMxNAvgFunc highbd_sad32x16_avg_sse2 = vpx_highbd_sad32x16_avg_sse2;
+const SadMxNAvgFunc highbd_sad16x32_avg_sse2 = vpx_highbd_sad16x32_avg_sse2;
+const SadMxNAvgFunc highbd_sad16x16_avg_sse2 = vpx_highbd_sad16x16_avg_sse2;
+const SadMxNAvgFunc highbd_sad16x8_avg_sse2 = vpx_highbd_sad16x8_avg_sse2;
+const SadMxNAvgFunc highbd_sad8x16_avg_sse2 = vpx_highbd_sad8x16_avg_sse2;
+const SadMxNAvgFunc highbd_sad8x8_avg_sse2 = vpx_highbd_sad8x8_avg_sse2;
+const SadMxNAvgFunc highbd_sad8x4_avg_sse2 = vpx_highbd_sad8x4_avg_sse2;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+const SadMxNAvgParam avg_sse2_tests[] = {
+  make_tuple(64, 64, sad64x64_avg_sse2, -1),
+  make_tuple(64, 32, sad64x32_avg_sse2, -1),
+  make_tuple(32, 64, sad32x64_avg_sse2, -1),
+  make_tuple(32, 32, sad32x32_avg_sse2, -1),
+  make_tuple(32, 16, sad32x16_avg_sse2, -1),
+  make_tuple(16, 32, sad16x32_avg_sse2, -1),
+  make_tuple(16, 16, sad16x16_avg_sse2, -1),
+  make_tuple(16, 8, sad16x8_avg_sse2, -1),
+  make_tuple(8, 16, sad8x16_avg_sse2, -1),
+  make_tuple(8, 8, sad8x8_avg_sse2, -1),
+  make_tuple(8, 4, sad8x4_avg_sse2, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(64, 64, highbd_sad64x64_avg_sse2, 8),
+  make_tuple(64, 32, highbd_sad64x32_avg_sse2, 8),
+  make_tuple(32, 64, highbd_sad32x64_avg_sse2, 8),
+  make_tuple(32, 32, highbd_sad32x32_avg_sse2, 8),
+  make_tuple(32, 16, highbd_sad32x16_avg_sse2, 8),
+  make_tuple(16, 32, highbd_sad16x32_avg_sse2, 8),
+  make_tuple(16, 16, highbd_sad16x16_avg_sse2, 8),
+  make_tuple(16, 8, highbd_sad16x8_avg_sse2, 8),
+  make_tuple(8, 16, highbd_sad8x16_avg_sse2, 8),
+  make_tuple(8, 8, highbd_sad8x8_avg_sse2, 8),
+  make_tuple(8, 4, highbd_sad8x4_avg_sse2, 8),
+  make_tuple(64, 64, highbd_sad64x64_avg_sse2, 10),
+  make_tuple(64, 32, highbd_sad64x32_avg_sse2, 10),
+  make_tuple(32, 64, highbd_sad32x64_avg_sse2, 10),
+  make_tuple(32, 32, highbd_sad32x32_avg_sse2, 10),
+  make_tuple(32, 16, highbd_sad32x16_avg_sse2, 10),
+  make_tuple(16, 32, highbd_sad16x32_avg_sse2, 10),
+  make_tuple(16, 16, highbd_sad16x16_avg_sse2, 10),
+  make_tuple(16, 8, highbd_sad16x8_avg_sse2, 10),
+  make_tuple(8, 16, highbd_sad8x16_avg_sse2, 10),
+  make_tuple(8, 8, highbd_sad8x8_avg_sse2, 10),
+  make_tuple(8, 4, highbd_sad8x4_avg_sse2, 10),
+  make_tuple(64, 64, highbd_sad64x64_avg_sse2, 12),
+  make_tuple(64, 32, highbd_sad64x32_avg_sse2, 12),
+  make_tuple(32, 64, highbd_sad32x64_avg_sse2, 12),
+  make_tuple(32, 32, highbd_sad32x32_avg_sse2, 12),
+  make_tuple(32, 16, highbd_sad32x16_avg_sse2, 12),
+  make_tuple(16, 32, highbd_sad16x32_avg_sse2, 12),
+  make_tuple(16, 16, highbd_sad16x16_avg_sse2, 12),
+  make_tuple(16, 8, highbd_sad16x8_avg_sse2, 12),
+  make_tuple(8, 16, highbd_sad8x16_avg_sse2, 12),
+  make_tuple(8, 8, highbd_sad8x8_avg_sse2, 12),
+  make_tuple(8, 4, highbd_sad8x4_avg_sse2, 12),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests));
+
+const SadMxNx4Func sad64x64x4d_sse2 = vpx_sad64x64x4d_sse2;
+const SadMxNx4Func sad64x32x4d_sse2 = vpx_sad64x32x4d_sse2;
+const SadMxNx4Func sad32x64x4d_sse2 = vpx_sad32x64x4d_sse2;
+const SadMxNx4Func sad32x32x4d_sse2 = vpx_sad32x32x4d_sse2;
+const SadMxNx4Func sad32x16x4d_sse2 = vpx_sad32x16x4d_sse2;
+const SadMxNx4Func sad16x32x4d_sse2 = vpx_sad16x32x4d_sse2;
+const SadMxNx4Func sad16x16x4d_sse2 = vpx_sad16x16x4d_sse2;
+const SadMxNx4Func sad16x8x4d_sse2 = vpx_sad16x8x4d_sse2;
+const SadMxNx4Func sad8x16x4d_sse2 = vpx_sad8x16x4d_sse2;
+const SadMxNx4Func sad8x8x4d_sse2 = vpx_sad8x8x4d_sse2;
+const SadMxNx4Func sad8x4x4d_sse2 = vpx_sad8x4x4d_sse2;
+#if CONFIG_VP9_HIGHBITDEPTH
+const SadMxNx4Func highbd_sad64x64x4d_sse2 = vpx_highbd_sad64x64x4d_sse2;
+const SadMxNx4Func highbd_sad64x32x4d_sse2 = vpx_highbd_sad64x32x4d_sse2;
+const SadMxNx4Func highbd_sad32x64x4d_sse2 = vpx_highbd_sad32x64x4d_sse2;
+const SadMxNx4Func highbd_sad32x32x4d_sse2 = vpx_highbd_sad32x32x4d_sse2;
+const SadMxNx4Func highbd_sad32x16x4d_sse2 = vpx_highbd_sad32x16x4d_sse2;
+const SadMxNx4Func highbd_sad16x32x4d_sse2 = vpx_highbd_sad16x32x4d_sse2;
+const SadMxNx4Func highbd_sad16x16x4d_sse2 = vpx_highbd_sad16x16x4d_sse2;
+const SadMxNx4Func highbd_sad16x8x4d_sse2 = vpx_highbd_sad16x8x4d_sse2;
+const SadMxNx4Func highbd_sad8x16x4d_sse2 = vpx_highbd_sad8x16x4d_sse2;
+const SadMxNx4Func highbd_sad8x8x4d_sse2 = vpx_highbd_sad8x8x4d_sse2;
+const SadMxNx4Func highbd_sad8x4x4d_sse2 = vpx_highbd_sad8x4x4d_sse2;
+const SadMxNx4Func highbd_sad4x8x4d_sse2 = vpx_highbd_sad4x8x4d_sse2;
+const SadMxNx4Func highbd_sad4x4x4d_sse2 = vpx_highbd_sad4x4x4d_sse2;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+const SadMxNx4Param x4d_sse2_tests[] = {
+  make_tuple(64, 64, sad64x64x4d_sse2, -1),
+  make_tuple(64, 32, sad64x32x4d_sse2, -1),
+  make_tuple(32, 64, sad32x64x4d_sse2, -1),
+  make_tuple(32, 32, sad32x32x4d_sse2, -1),
+  make_tuple(32, 16, sad32x16x4d_sse2, -1),
+  make_tuple(16, 32, sad16x32x4d_sse2, -1),
+  make_tuple(16, 16, sad16x16x4d_sse2, -1),
+  make_tuple(16, 8, sad16x8x4d_sse2, -1),
+  make_tuple(8, 16, sad8x16x4d_sse2, -1),
+  make_tuple(8, 8, sad8x8x4d_sse2, -1),
+  make_tuple(8, 4, sad8x4x4d_sse2, -1),
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(64, 64, highbd_sad64x64x4d_sse2, 8),
+  make_tuple(64, 32, highbd_sad64x32x4d_sse2, 8),
+  make_tuple(32, 64, highbd_sad32x64x4d_sse2, 8),
+  make_tuple(32, 32, highbd_sad32x32x4d_sse2, 8),
+  make_tuple(32, 16, highbd_sad32x16x4d_sse2, 8),
+  make_tuple(16, 32, highbd_sad16x32x4d_sse2, 8),
+  make_tuple(16, 16, highbd_sad16x16x4d_sse2, 8),
+  make_tuple(16, 8, highbd_sad16x8x4d_sse2, 8),
+  make_tuple(8, 16, highbd_sad8x16x4d_sse2, 8),
+  make_tuple(8, 8, highbd_sad8x8x4d_sse2, 8),
+  make_tuple(8, 4, highbd_sad8x4x4d_sse2, 8),
+  make_tuple(4, 8, highbd_sad4x8x4d_sse2, 8),
+  make_tuple(4, 4, highbd_sad4x4x4d_sse2, 8),
+  make_tuple(64, 64, highbd_sad64x64x4d_sse2, 10),
+  make_tuple(64, 32, highbd_sad64x32x4d_sse2, 10),
+  make_tuple(32, 64, highbd_sad32x64x4d_sse2, 10),
+  make_tuple(32, 32, highbd_sad32x32x4d_sse2, 10),
+  make_tuple(32, 16, highbd_sad32x16x4d_sse2, 10),
+  make_tuple(16, 32, highbd_sad16x32x4d_sse2, 10),
+  make_tuple(16, 16, highbd_sad16x16x4d_sse2, 10),
+  make_tuple(16, 8, highbd_sad16x8x4d_sse2, 10),
+  make_tuple(8, 16, highbd_sad8x16x4d_sse2, 10),
+  make_tuple(8, 8, highbd_sad8x8x4d_sse2, 10),
+  make_tuple(8, 4, highbd_sad8x4x4d_sse2, 10),
+  make_tuple(4, 8, highbd_sad4x8x4d_sse2, 10),
+  make_tuple(4, 4, highbd_sad4x4x4d_sse2, 10),
+  make_tuple(64, 64, highbd_sad64x64x4d_sse2, 12),
+  make_tuple(64, 32, highbd_sad64x32x4d_sse2, 12),
+  make_tuple(32, 64, highbd_sad32x64x4d_sse2, 12),
+  make_tuple(32, 32, highbd_sad32x32x4d_sse2, 12),
+  make_tuple(32, 16, highbd_sad32x16x4d_sse2, 12),
+  make_tuple(16, 32, highbd_sad16x32x4d_sse2, 12),
+  make_tuple(16, 16, highbd_sad16x16x4d_sse2, 12),
+  make_tuple(16, 8, highbd_sad16x8x4d_sse2, 12),
+  make_tuple(8, 16, highbd_sad8x16x4d_sse2, 12),
+  make_tuple(8, 8, highbd_sad8x8x4d_sse2, 12),
+  make_tuple(8, 4, highbd_sad8x4x4d_sse2, 12),
+  make_tuple(4, 8, highbd_sad4x8x4d_sse2, 12),
+  make_tuple(4, 4, highbd_sad4x4x4d_sse2, 12),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
 #endif  // CONFIG_USE_X86INC
-#endif  // CONFIG_VP9_ENCODER
 #endif  // HAVE_SSE2
 
 #if HAVE_SSE3
-#if CONFIG_VP8_ENCODER
-const SadMxNx4Func sad_16x16x4d_sse3 = vp8_sad16x16x4d_sse3;
-const SadMxNx4Func sad_16x8x4d_sse3 = vp8_sad16x8x4d_sse3;
-const SadMxNx4Func sad_8x16x4d_sse3 = vp8_sad8x16x4d_sse3;
-const SadMxNx4Func sad_8x8x4d_sse3 = vp8_sad8x8x4d_sse3;
-const SadMxNx4Func sad_4x4x4d_sse3 = vp8_sad4x4x4d_sse3;
-INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
-                        make_tuple(16, 16, sad_16x16x4d_sse3, -1),
-                        make_tuple(16, 8, sad_16x8x4d_sse3, -1),
-                        make_tuple(8, 16, sad_8x16x4d_sse3, -1),
-                        make_tuple(8, 8, sad_8x8x4d_sse3, -1),
-                        make_tuple(4, 4, sad_4x4x4d_sse3, -1)));
-#endif  // CONFIG_VP8_ENCODER
+// Only functions are x3, which do not have tests.
 #endif  // HAVE_SSE3
 
 #if HAVE_SSSE3
-#if CONFIG_USE_X86INC
-#if CONFIG_VP8_ENCODER
-const SadMxNFunc sad_16x16_sse3 = vp8_sad16x16_sse3;
-INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
-                        make_tuple(16, 16, sad_16x16_sse3, -1)));
-#endif  // CONFIG_VP8_ENCODER
-#endif  // CONFIG_USE_X86INC
+// Only functions are x3, which do not have tests.
 #endif  // HAVE_SSSE3
 
-#if CONFIG_VP9_ENCODER
-#if HAVE_AVX2
-const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
-const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
-INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
-                        make_tuple(32, 32, sad_32x32x4d_avx2, -1),
-                        make_tuple(64, 64, sad_64x64x4d_avx2, -1)));
-#endif  // HAVE_AVX2
+#if HAVE_SSE4_1
+// Only functions are x8, which do not have tests.
+#endif  // HAVE_SSE4_1
 
-#if HAVE_NEON
-const SadMxNx4Func sad_16x16x4d_neon = vp9_sad16x16x4d_neon;
-const SadMxNx4Func sad_32x32x4d_neon = vp9_sad32x32x4d_neon;
-const SadMxNx4Func sad_64x64x4d_neon = vp9_sad64x64x4d_neon;
-INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::Values(
-                        make_tuple(16, 16, sad_16x16x4d_neon, -1),
-                        make_tuple(32, 32, sad_32x32x4d_neon, -1),
-                        make_tuple(64, 64, sad_64x64x4d_neon, -1)));
-#endif  // HAVE_NEON
-#endif  // CONFIG_VP9_ENCODER
+#if HAVE_AVX2
+const SadMxNFunc sad64x64_avx2 = vpx_sad64x64_avx2;
+const SadMxNFunc sad64x32_avx2 = vpx_sad64x32_avx2;
+const SadMxNFunc sad32x64_avx2 = vpx_sad32x64_avx2;
+const SadMxNFunc sad32x32_avx2 = vpx_sad32x32_avx2;
+const SadMxNFunc sad32x16_avx2 = vpx_sad32x16_avx2;
+const SadMxNParam avx2_tests[] = {
+  make_tuple(64, 64, sad64x64_avx2, -1),
+  make_tuple(64, 32, sad64x32_avx2, -1),
+  make_tuple(32, 64, sad32x64_avx2, -1),
+  make_tuple(32, 32, sad32x32_avx2, -1),
+  make_tuple(32, 16, sad32x16_avx2, -1),
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
+
+const SadMxNAvgFunc sad64x64_avg_avx2 = vpx_sad64x64_avg_avx2;
+const SadMxNAvgFunc sad64x32_avg_avx2 = vpx_sad64x32_avg_avx2;
+const SadMxNAvgFunc sad32x64_avg_avx2 = vpx_sad32x64_avg_avx2;
+const SadMxNAvgFunc sad32x32_avg_avx2 = vpx_sad32x32_avg_avx2;
+const SadMxNAvgFunc sad32x16_avg_avx2 = vpx_sad32x16_avg_avx2;
+const SadMxNAvgParam avg_avx2_tests[] = {
+  make_tuple(64, 64, sad64x64_avg_avx2, -1),
+  make_tuple(64, 32, sad64x32_avg_avx2, -1),
+  make_tuple(32, 64, sad32x64_avg_avx2, -1),
+  make_tuple(32, 32, sad32x32_avg_avx2, -1),
+  make_tuple(32, 16, sad32x16_avg_avx2, -1),
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
+
+const SadMxNx4Func sad64x64x4d_avx2 = vpx_sad64x64x4d_avx2;
+const SadMxNx4Func sad32x32x4d_avx2 = vpx_sad32x32x4d_avx2;
+const SadMxNx4Param x4d_avx2_tests[] = {
+  make_tuple(64, 64, sad64x64x4d_avx2, -1),
+  make_tuple(32, 32, sad32x32x4d_avx2, -1),
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
+#endif  // HAVE_AVX2
 
 }  // namespace