test/av1_horz_only_frame_superres_test.cc - aom - Git at Google

 /*
  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #include <tuple>
 #include <vector>

 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"

 #include "config/av1_rtcd.h"

 #include "aom_ports/aom_timer.h"
 #include "av1/common/convolve.h"
 #include "av1/common/resize.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"

 namespace {
 const int kTestIters = 10;
 const int kPerfIters = 1000;

 const int kVPad = 32;
 const int kHPad = 32;

 using libaom_test::ACMRandom;
 using std::make_tuple;
 using std::tuple;

 template <typename Pixel>
 class TestImage {
  public:
   TestImage(int w_src, int h, int superres_denom, int x0, int bd)
       : w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0),
         bd_(bd) {
     assert(bd < 16);
     assert(bd <= 8 * static_cast<int>(sizeof(Pixel)));
     assert(9 <= superres_denom && superres_denom <= 16);
     assert(SCALE_NUMERATOR == 8);
     assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK);

     w_dst_ = w_src_;
     av1_calculate_unscaled_superres_size(&w_dst_, NULL, superres_denom);

     src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4);
     dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4);

     // Allocate image data
     src_data_.resize(2 * src_block_size());
     dst_data_.resize(2 * dst_block_size());
   }

   void Initialize(ACMRandom *rnd);
   void Check() const;

   int src_stride() const { return src_stride_; }
   int dst_stride() const { return dst_stride_; }

   int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
   int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }

   int src_width() const { return w_src_; }
   int dst_width() const { return w_dst_; }
   int height() const { return h_; }
   int x0() const { return x0_; }

   const Pixel *GetSrcData(bool ref, bool borders) const {
     const Pixel *block = &src_data_[ref ? 0 : src_block_size()];
     return borders ? block : block + kHPad + src_stride_ * kVPad;
   }

   Pixel *GetDstData(bool ref, bool borders) {
     Pixel *block = &dst_data_[ref ? 0 : dst_block_size()];
     return borders ? block : block + kHPad + dst_stride_ * kVPad;
   }

  private:
   int w_src_, w_dst_, h_, superres_denom_, x0_, bd_;
   int src_stride_, dst_stride_;

   std::vector<Pixel> src_data_;
   std::vector<Pixel> dst_data_;
 };

 template <typename Pixel>
 void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
   if (!trash) {
     memset(data, 0, sizeof(*data) * num_pixels);
     return;
   }
   const Pixel mask = (1 << bd) - 1;
   for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
 }

 template <typename Pixel>
 void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
                  bool trash_edges, Pixel *data) {
   assert(rnd);
   const Pixel mask = (1 << bd) - 1;

   // Fill in the first buffer with random data
   // Top border
   FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
   for (int r = 0; r < h; ++r) {
     Pixel *row_data = data + (kVPad + r) * stride;
     // Left border, contents, right border
     FillEdge(rnd, kHPad, bd, trash_edges, row_data);
     for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
     FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
   }
   // Bottom border
   FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));

   const int bpp = sizeof(*data);
   const int block_elts = stride * (h + 2 * kVPad);
   const int block_size = bpp * block_elts;

   // Now copy that to the second buffer
   memcpy(data + block_elts, data, block_size);
 }

 template <typename Pixel>
 void TestImage<Pixel>::Initialize(ACMRandom *rnd) {
   PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]);
   PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]);
 }

 template <typename Pixel>
 void TestImage<Pixel>::Check() const {
   const int num_pixels = dst_block_size();
   const Pixel *ref_dst = &dst_data_[0];
   const Pixel *tst_dst = &dst_data_[num_pixels];

   // If memcmp returns 0, there's nothing to do.
   if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return;

   // Otherwise, iterate through the buffer looking for differences, *ignoring
   // the edges*
   const int stride = dst_stride_;
   for (int r = kVPad; r < h_ + kVPad; ++r) {
     for (int c = kVPad; c < w_dst_ + kHPad; ++c) {
       const int32_t ref_value = ref_dst[r * stride + c];
       const int32_t tst_value = tst_dst[r * stride + c];

       EXPECT_EQ(tst_value, ref_value)
           << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad)
           << ", superres_denom: " << superres_denom_ << ", height: " << h_
           << ", src_width: " << w_src_ << ", dst_width: " << w_dst_
           << ", x0: " << x0_;
     }
   }
 }

 template <typename Pixel>
 class ConvolveHorizRSTestBase : public ::testing::Test {
  public:
   ConvolveHorizRSTestBase() : image_(NULL) {}
   virtual ~ConvolveHorizRSTestBase() {}
   virtual void TearDown() { libaom_test::ClearSystemState(); }

   // Implemented by subclasses (SetUp depends on the parameters passed
   // in and RunOne depends on the function to be tested. These can't
   // be templated for low/high bit depths because they have different
   // numbers of parameters)
   virtual void SetUp() = 0;
   virtual void RunOne(bool ref) = 0;

  protected:
   void SetBitDepth(int bd) { bd_ = bd; }

   void CorrectnessTest() {
     ACMRandom rnd(ACMRandom::DeterministicSeed());
     for (int i = 0; i < kTestIters; ++i) {
       for (int superres_denom = 9; superres_denom <= 16; superres_denom++) {
         // Get a random height between 512 and 767
         int height = rnd.Rand8() + 512;

         // Get a random src width between 128 and 383
         int width_src = rnd.Rand8() + 128;

         // x0 is normally calculated by get_upscale_convolve_x0 in
         // av1/common/resize.c. However, this test should work for
         // any value of x0 between 0 and RS_SCALE_SUBPEL_MASK
         // (inclusive), so we choose one at random.
         int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1);

         image_ =
             new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);

         Prep(&rnd);
         RunOne(true);
         RunOne(false);
         image_->Check();

         delete image_;
       }
     }
   }

   void SpeedTest() {
     // Pick some specific parameters to test
     int height = 767;
     int width_src = 129;
     int superres_denom = 13;
     int x0 = RS_SCALE_SUBPEL_MASK >> 1;

     image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);

     ACMRandom rnd(ACMRandom::DeterministicSeed());
     Prep(&rnd);

     aom_usec_timer ref_timer;
     aom_usec_timer_start(&ref_timer);
     for (int i = 0; i < kPerfIters; ++i) RunOne(true);
     aom_usec_timer_mark(&ref_timer);
     const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);

     aom_usec_timer tst_timer;
     aom_usec_timer_start(&tst_timer);
     for (int i = 0; i < kPerfIters; ++i) RunOne(false);
     aom_usec_timer_mark(&tst_timer);
     const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);

     std::cout << "[          ] C time = " << ref_time / 1000
               << " ms, SIMD time = " << tst_time / 1000 << " ms\n";

     EXPECT_GT(ref_time, tst_time)
         << "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n"
         << "C time: " << ref_time << " us\n"
         << "SIMD time: " << tst_time << " us\n";
   }

   void Prep(ACMRandom *rnd) {
     assert(rnd);
     image_->Initialize(rnd);
   }

   int bd_;
   TestImage<Pixel> *image_;
 };

 typedef void (*LowBDConvolveHorizRsFunc)(const uint8_t *src, int src_stride,
                                          uint8_t *dst, int dst_stride, int w,
                                          int h, const int16_t *x_filters,
                                          const int x0_qn, const int x_step_qn);

 // Test parameter list:
 //  <tst_fun_>
 typedef tuple<LowBDConvolveHorizRsFunc> LowBDParams;

 class LowBDConvolveHorizRSTest
     : public ConvolveHorizRSTestBase<uint8_t>,
       public ::testing::WithParamInterface<LowBDParams> {
  public:
   virtual ~LowBDConvolveHorizRSTest() {}

   void SetUp() {
     tst_fun_ = GET_PARAM(0);
     const int bd = 8;
     SetBitDepth(bd);
   }

   void RunOne(bool ref) {
     const uint8_t *src = image_->GetSrcData(ref, false);
     uint8_t *dst = image_->GetDstData(ref, false);
     const int src_stride = image_->src_stride();
     const int dst_stride = image_->dst_stride();
     const int width_src = image_->src_width();
     const int width_dst = image_->dst_width();
     const int height = image_->height();
     const int x0_qn = image_->x0();

     const int32_t x_step_qn =
         av1_get_upscale_convolve_step(width_src, width_dst);

     if (ref) {
       av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst,
                               height, &av1_resize_filter_normative[0][0], x0_qn,
                               x_step_qn);
     } else {
       tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
                &av1_resize_filter_normative[0][0], x0_qn, x_step_qn);
     }
   }

  private:
   LowBDConvolveHorizRsFunc tst_fun_;
 };

 TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
 TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }

 INSTANTIATE_TEST_SUITE_P(SSE4_1, LowBDConvolveHorizRSTest,
                          ::testing::Values(av1_convolve_horiz_rs_sse4_1));

 #if CONFIG_AV1_HIGHBITDEPTH
 typedef void (*HighBDConvolveHorizRsFunc)(const uint16_t *src, int src_stride,
                                           uint16_t *dst, int dst_stride, int w,
                                           int h, const int16_t *x_filters,
                                           const int x0_qn, const int x_step_qn,
                                           int bd);

 // Test parameter list:
 //  <tst_fun_, bd_>
 typedef tuple<HighBDConvolveHorizRsFunc, int> HighBDParams;

 class HighBDConvolveHorizRSTest
     : public ConvolveHorizRSTestBase<uint16_t>,
       public ::testing::WithParamInterface<HighBDParams> {
  public:
   virtual ~HighBDConvolveHorizRSTest() {}

   void SetUp() {
     tst_fun_ = GET_PARAM(0);
     const int bd = GET_PARAM(1);
     SetBitDepth(bd);
   }

   void RunOne(bool ref) {
     const uint16_t *src = image_->GetSrcData(ref, false);
     uint16_t *dst = image_->GetDstData(ref, false);
     const int src_stride = image_->src_stride();
     const int dst_stride = image_->dst_stride();
     const int width_src = image_->src_width();
     const int width_dst = image_->dst_width();
     const int height = image_->height();
     const int x0_qn = image_->x0();

     const int32_t x_step_qn =
         av1_get_upscale_convolve_step(width_src, width_dst);

     if (ref) {
       av1_highbd_convolve_horiz_rs_c(
           src, src_stride, dst, dst_stride, width_dst, height,
           &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
     } else {
       tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
                &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
     }
   }

  private:
   HighBDConvolveHorizRsFunc tst_fun_;
 };

 const int kBDs[] = { 8, 10, 12 };

 TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
 TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }

 INSTANTIATE_TEST_SUITE_P(
     SSE4_1, HighBDConvolveHorizRSTest,
     ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1),
                        ::testing::ValuesIn(kBDs)));
 #endif  // CONFIG_AV1_HIGHBITDEPTH

 }  // namespace
	/*
	* Copyright (c) 2018, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#include <tuple>
	#include <vector>

	#include "third_party/googletest/src/googletest/include/gtest/gtest.h"

	#include "config/av1_rtcd.h"

	#include "aom_ports/aom_timer.h"
	#include "av1/common/convolve.h"
	#include "av1/common/resize.h"
	#include "test/acm_random.h"
	#include "test/clear_system_state.h"
	#include "test/register_state_check.h"
	#include "test/util.h"

	namespace {
	const int kTestIters = 10;
	const int kPerfIters = 1000;

	const int kVPad = 32;
	const int kHPad = 32;

	using libaom_test::ACMRandom;
	using std::make_tuple;
	using std::tuple;

	template <typename Pixel>
	class TestImage {
	public:
	TestImage(int w_src, int h, int superres_denom, int x0, int bd)
	: w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0),
	bd_(bd) {
	assert(bd < 16);
	assert(bd <= 8 * static_cast<int>(sizeof(Pixel)));
	assert(9 <= superres_denom && superres_denom <= 16);
	assert(SCALE_NUMERATOR == 8);
	assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK);

	w_dst_ = w_src_;
	av1_calculate_unscaled_superres_size(&w_dst_, NULL, superres_denom);

	src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4);
	dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4);

	// Allocate image data
	src_data_.resize(2 * src_block_size());
	dst_data_.resize(2 * dst_block_size());
	}

	void Initialize(ACMRandom *rnd);
	void Check() const;

	int src_stride() const { return src_stride_; }
	int dst_stride() const { return dst_stride_; }

	int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
	int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }

	int src_width() const { return w_src_; }
	int dst_width() const { return w_dst_; }
	int height() const { return h_; }
	int x0() const { return x0_; }

	const Pixel *GetSrcData(bool ref, bool borders) const {
	const Pixel *block = &src_data_[ref ? 0 : src_block_size()];
	return borders ? block : block + kHPad + src_stride_ * kVPad;
	}

	Pixel *GetDstData(bool ref, bool borders) {
	Pixel *block = &dst_data_[ref ? 0 : dst_block_size()];
	return borders ? block : block + kHPad + dst_stride_ * kVPad;
	}

	private:
	int w_src_, w_dst_, h_, superres_denom_, x0_, bd_;
	int src_stride_, dst_stride_;

	std::vector<Pixel> src_data_;
	std::vector<Pixel> dst_data_;
	};

	template <typename Pixel>
	void FillEdge(ACMRandom rnd, int num_pixels, int bd, bool trash, Pixel data) {
	if (!trash) {
	memset(data, 0, sizeof(data) num_pixels);
	return;
	}
	const Pixel mask = (1 << bd) - 1;
	for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
	}

	template <typename Pixel>
	void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
	bool trash_edges, Pixel *data) {
	assert(rnd);
	const Pixel mask = (1 << bd) - 1;

	// Fill in the first buffer with random data
	// Top border
	FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
	for (int r = 0; r < h; ++r) {
	Pixel row_data = data + (kVPad + r) stride;
	// Left border, contents, right border
	FillEdge(rnd, kHPad, bd, trash_edges, row_data);
	for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
	FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
	}
	// Bottom border
	FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));

	const int bpp = sizeof(*data);
	const int block_elts = stride * (h + 2 * kVPad);
	const int block_size = bpp * block_elts;

	// Now copy that to the second buffer
	memcpy(data + block_elts, data, block_size);
	}

	template <typename Pixel>
	void TestImage<Pixel>::Initialize(ACMRandom *rnd) {
	PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]);
	PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]);
	}

	template <typename Pixel>
	void TestImage<Pixel>::Check() const {
	const int num_pixels = dst_block_size();
	const Pixel *ref_dst = &dst_data_[0];
	const Pixel *tst_dst = &dst_data_[num_pixels];

	// If memcmp returns 0, there's nothing to do.
	if (0 == memcmp(ref_dst, tst_dst, sizeof(ref_dst) num_pixels)) return;

	// Otherwise, iterate through the buffer looking for differences, *ignoring
	// the edges*
	const int stride = dst_stride_;
	for (int r = kVPad; r < h_ + kVPad; ++r) {
	for (int c = kVPad; c < w_dst_ + kHPad; ++c) {
	const int32_t ref_value = ref_dst[r * stride + c];
	const int32_t tst_value = tst_dst[r * stride + c];

	EXPECT_EQ(tst_value, ref_value)
	<< "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad)
	<< ", superres_denom: " << superres_denom_ << ", height: " << h_
	<< ", src_width: " << w_src_ << ", dst_width: " << w_dst_
	<< ", x0: " << x0_;
	}
	}
	}

	template <typename Pixel>
	class ConvolveHorizRSTestBase : public ::testing::Test {
	public:
	ConvolveHorizRSTestBase() : image_(NULL) {}
	virtual ~ConvolveHorizRSTestBase() {}
	virtual void TearDown() { libaom_test::ClearSystemState(); }

	// Implemented by subclasses (SetUp depends on the parameters passed
	// in and RunOne depends on the function to be tested. These can't
	// be templated for low/high bit depths because they have different
	// numbers of parameters)
	virtual void SetUp() = 0;
	virtual void RunOne(bool ref) = 0;

	protected:
	void SetBitDepth(int bd) { bd_ = bd; }

	void CorrectnessTest() {
	ACMRandom rnd(ACMRandom::DeterministicSeed());
	for (int i = 0; i < kTestIters; ++i) {
	for (int superres_denom = 9; superres_denom <= 16; superres_denom++) {
	// Get a random height between 512 and 767
	int height = rnd.Rand8() + 512;

	// Get a random src width between 128 and 383
	int width_src = rnd.Rand8() + 128;

	// x0 is normally calculated by get_upscale_convolve_x0 in
	// av1/common/resize.c. However, this test should work for
	// any value of x0 between 0 and RS_SCALE_SUBPEL_MASK
	// (inclusive), so we choose one at random.
	int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1);

	image_ =
	new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);

	Prep(&rnd);
	RunOne(true);
	RunOne(false);
	image_->Check();

	delete image_;
	}
	}
	}

	void SpeedTest() {
	// Pick some specific parameters to test
	int height = 767;
	int width_src = 129;
	int superres_denom = 13;
	int x0 = RS_SCALE_SUBPEL_MASK >> 1;

	image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);

	ACMRandom rnd(ACMRandom::DeterministicSeed());
	Prep(&rnd);

	aom_usec_timer ref_timer;
	aom_usec_timer_start(&ref_timer);
	for (int i = 0; i < kPerfIters; ++i) RunOne(true);
	aom_usec_timer_mark(&ref_timer);
	const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);

	aom_usec_timer tst_timer;
	aom_usec_timer_start(&tst_timer);
	for (int i = 0; i < kPerfIters; ++i) RunOne(false);
	aom_usec_timer_mark(&tst_timer);
	const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);

	std::cout << "[ ] C time = " << ref_time / 1000
	<< " ms, SIMD time = " << tst_time / 1000 << " ms\n";

	EXPECT_GT(ref_time, tst_time)
	<< "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n"
	<< "C time: " << ref_time << " us\n"
	<< "SIMD time: " << tst_time << " us\n";
	}

	void Prep(ACMRandom *rnd) {
	assert(rnd);
	image_->Initialize(rnd);
	}

	int bd_;
	TestImage<Pixel> *image_;
	};

	typedef void (LowBDConvolveHorizRsFunc)(const uint8_t src, int src_stride,
	uint8_t *dst, int dst_stride, int w,
	int h, const int16_t *x_filters,
	const int x0_qn, const int x_step_qn);

	// Test parameter list:
	// <tst_fun_>
	typedef tuple<LowBDConvolveHorizRsFunc> LowBDParams;

	class LowBDConvolveHorizRSTest
	: public ConvolveHorizRSTestBase<uint8_t>,
	public ::testing::WithParamInterface<LowBDParams> {
	public:
	virtual ~LowBDConvolveHorizRSTest() {}

	void SetUp() {
	tst_fun_ = GET_PARAM(0);
	const int bd = 8;
	SetBitDepth(bd);
	}

	void RunOne(bool ref) {
	const uint8_t *src = image_->GetSrcData(ref, false);
	uint8_t *dst = image_->GetDstData(ref, false);
	const int src_stride = image_->src_stride();
	const int dst_stride = image_->dst_stride();
	const int width_src = image_->src_width();
	const int width_dst = image_->dst_width();
	const int height = image_->height();
	const int x0_qn = image_->x0();

	const int32_t x_step_qn =
	av1_get_upscale_convolve_step(width_src, width_dst);

	if (ref) {
	av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst,
	height, &av1_resize_filter_normative[0][0], x0_qn,
	x_step_qn);
	} else {
	tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
	&av1_resize_filter_normative[0][0], x0_qn, x_step_qn);
	}
	}

	private:
	LowBDConvolveHorizRsFunc tst_fun_;
	};

	TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
	TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }

	INSTANTIATE_TEST_SUITE_P(SSE4_1, LowBDConvolveHorizRSTest,
	::testing::Values(av1_convolve_horiz_rs_sse4_1));

	#if CONFIG_AV1_HIGHBITDEPTH
	typedef void (HighBDConvolveHorizRsFunc)(const uint16_t src, int src_stride,
	uint16_t *dst, int dst_stride, int w,
	int h, const int16_t *x_filters,
	const int x0_qn, const int x_step_qn,
	int bd);

	// Test parameter list:
	// <tst_fun_, bd_>
	typedef tuple<HighBDConvolveHorizRsFunc, int> HighBDParams;

	class HighBDConvolveHorizRSTest
	: public ConvolveHorizRSTestBase<uint16_t>,
	public ::testing::WithParamInterface<HighBDParams> {
	public:
	virtual ~HighBDConvolveHorizRSTest() {}

	void SetUp() {
	tst_fun_ = GET_PARAM(0);
	const int bd = GET_PARAM(1);
	SetBitDepth(bd);
	}

	void RunOne(bool ref) {
	const uint16_t *src = image_->GetSrcData(ref, false);
	uint16_t *dst = image_->GetDstData(ref, false);
	const int src_stride = image_->src_stride();
	const int dst_stride = image_->dst_stride();
	const int width_src = image_->src_width();
	const int width_dst = image_->dst_width();
	const int height = image_->height();
	const int x0_qn = image_->x0();

	const int32_t x_step_qn =
	av1_get_upscale_convolve_step(width_src, width_dst);

	if (ref) {
	av1_highbd_convolve_horiz_rs_c(
	src, src_stride, dst, dst_stride, width_dst, height,
	&av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
	} else {
	tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
	&av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
	}
	}

	private:
	HighBDConvolveHorizRsFunc tst_fun_;
	};

	const int kBDs[] = { 8, 10, 12 };

	TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
	TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }

	INSTANTIATE_TEST_SUITE_P(
	SSE4_1, HighBDConvolveHorizRSTest,
	::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1),
	::testing::ValuesIn(kBDs)));
	#endif // CONFIG_AV1_HIGHBITDEPTH

	} // namespace