Revert "Add SSE4.1 optimization for interp_cubic()" This reverts commit 145a5f09dcb5e26bc9f40a00f7ef3559bd9c5159. Reason for revert: test fails in x86 build Bug: 506138571 Change-Id: I965ecfb7394b4601315abe20c4935f0b3b310739
diff --git a/av1/av1.cmake b/av1/av1.cmake index 4b92eb4..5e730f9 100644 --- a/av1/av1.cmake +++ b/av1/av1.cmake
@@ -363,7 +363,6 @@ "${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm2d_sse4.c" "${AOM_ROOT}/av1/encoder/x86/encodetxb_sse4.c" "${AOM_ROOT}/av1/encoder/x86/highbd_fwd_txfm_sse4.c" - "${AOM_ROOT}/av1/encoder/x86/model_rd_sse4.c" "${AOM_ROOT}/av1/encoder/x86/rdopt_sse4.c" "${AOM_ROOT}/av1/encoder/x86/pickrst_sse4.c")
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index 2e7acf3..78e6429 100644 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl
@@ -460,9 +460,6 @@ add_proto qw/void av1_get_horver_correlation_full/, "const int16_t *diff, int stride, int w, int h, float *hcorr, float *vcorr"; specialize qw/av1_get_horver_correlation_full sse4_1 avx2 neon/; - add_proto qw/void av1_interp_cubic_rate_dist/, "const double *p1, const double *p2, double x, double * const rate_f, double * const distbysse_f"; - specialize qw/av1_interp_cubic_rate_dist sse4_1/; - add_proto qw/void av1_nn_predict/, "const float *input_nodes, const NN_CONFIG *const nn_config, int reduce_prec, float *const output"; add_proto qw/void av1_nn_fast_softmax_16/, "const float *input_nodes, float *output";
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c index b5f6b3a..e9f6e54 100644 --- a/av1/encoder/rd.c +++ b/av1/encoder/rd.c
@@ -949,13 +949,6 @@ x * (3.0 * (p[1] - p[2]) + p[3] - p[0]))); } -void av1_interp_cubic_rate_dist_c(const double *p1, const double *p2, double x, - double *const rate_f, - double *const distbysse_f) { - *rate_f = interp_cubic(p1, x); - *distbysse_f = interp_cubic(p2, x); -} - /* static double interp_bicubic(const double *p, int p_stride, double x, double y) { @@ -1087,8 +1080,9 @@ assert(xi > 0); const double *prate = &interp_rgrid_curv[rcat][(xi - 1)]; + *rate_f = interp_cubic(prate, xo); const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)]; - av1_interp_cubic_rate_dist(prate, pdist, xo, rate_f, distbysse_f); + *distbysse_f = interp_cubic(pdist, xo); } static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
diff --git a/av1/encoder/x86/model_rd_sse4.c b/av1/encoder/x86/model_rd_sse4.c deleted file mode 100644 index 48bc580..0000000 --- a/av1/encoder/x86/model_rd_sse4.c +++ /dev/null
@@ -1,64 +0,0 @@ -/* - * Copyright (c) 2026, Alliance for Open Media. All rights reserved. - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <smmintrin.h> - -#include "config/av1_rtcd.h" - -void av1_interp_cubic_rate_dist_sse4_1(const double *p1, const double *p2, - double x, double *const rate_f, - double *const distbysse_f) { - const __m128d half = _mm_set1_pd(0.5); - const __m128d two = _mm_set1_pd(2.0); - const __m128d three = _mm_set1_pd(3.0); - const __m128d four = _mm_set1_pd(4.0); - const __m128d five = _mm_set1_pd(5.0); - - const __m128d reg_x = _mm_set1_pd(x); - const __m128d reg_p0 = _mm_set_pd(p2[0], p1[0]); - const __m128d reg_p1 = _mm_set_pd(p2[1], p1[1]); - const __m128d reg_p2 = _mm_set_pd(p2[2], p1[2]); - const __m128d reg_p3 = _mm_set_pd(p2[3], p1[3]); - - // To ensure that results are bit-identical to the C code, we need to perform - // exactly the same sequence of operations here as in the C code. - // reg_res_0 = x * (3.0 * (p[1] - p[2]) + p[3] - p[0]) - __m128d reg_res_0 = _mm_sub_pd(reg_p1, reg_p2); - reg_res_0 = _mm_mul_pd(three, reg_res_0); - reg_res_0 = _mm_add_pd(reg_res_0, reg_p3); - reg_res_0 = _mm_sub_pd(reg_res_0, reg_p0); - reg_res_0 = _mm_mul_pd(reg_x, reg_res_0); - - // reg_res_1 = 2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2]- p[3] - const __m128d regp0_x_2 = _mm_mul_pd(two, reg_p0); - const __m128d regp1_x_5 = _mm_mul_pd(five, reg_p1); - const __m128d regp2_x_4 = _mm_mul_pd(four, reg_p2); - __m128d reg_res_1 = _mm_sub_pd(regp0_x_2, regp1_x_5); - reg_res_1 = _mm_add_pd(reg_res_1, regp2_x_4); - reg_res_1 = _mm_sub_pd(reg_res_1, reg_p3); - - // reg_res_2 = x * (reg_res_1 + reg_res_0) - __m128d reg_res_2 = _mm_add_pd(reg_res_1, reg_res_0); - reg_res_2 = _mm_mul_pd(reg_x, reg_res_2); - - // reg_res_3 = p[2] - p[0] + reg_res_2 - __m128d reg_res_3 = _mm_sub_pd(reg_p2, reg_p0); - reg_res_3 = _mm_add_pd(reg_res_3, reg_res_2); - - // reg_res_4 = p[1] + 0.5 * x * reg_res_3 - __m128d reg_res_4 = _mm_mul_pd(_mm_mul_pd(half, reg_x), reg_res_3); - reg_res_4 = _mm_add_pd(reg_p1, reg_res_4); - - double result[2]; - _mm_storeu_pd(result, reg_res_4); - *rate_f = result[0]; - *distbysse_f = result[1]; -}
diff --git a/test/model_rd_test.cc b/test/model_rd_test.cc deleted file mode 100644 index 910f20a..0000000 --- a/test/model_rd_test.cc +++ /dev/null
@@ -1,114 +0,0 @@ -/* - * Copyright (c) 2026, Alliance for Open Media. All rights reserved. - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <cstdlib> - -#include "gtest/gtest.h" -#include "config/av1_rtcd.h" - -#include "test/acm_random.h" -#include "test/register_state_check.h" -#include "test/util.h" - -using libaom_test::ACMRandom; - -namespace { - -using InterpCubicRateDistFunc = void (*)(const double *p1, const double *p2, - double x, double *const rate_f, - double *const distbysse_f); - -using InterpCubicTestParam = std::tuple<const InterpCubicRateDistFunc>; - -class InterpCubicTest : public ::testing::TestWithParam<InterpCubicTestParam> { - public: - double generate_random_double(double min, double max) { - return min + (static_cast<double>(rnd_.Rand31()) / ((1U << 31) - 1)) * - (max - min); - } - void SetUp() override { target_func_ = GET_PARAM(0); } - void TearDown() override {} - void CheckOutput(); - void SpeedTest(); - - protected: - InterpCubicRateDistFunc target_func_; - - private: - libaom_test::ACMRandom rnd_; -}; -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InterpCubicTest); - -void InterpCubicTest::CheckOutput() { - double p1[4], p2[4]; - double rate_f_ref, rate_f_mod, distbysse_f_ref, distbysse_f_mod; - const int knum_iter = 10000; - for (int iter = 0; iter < knum_iter; iter++) { - for (int i = 0; i < 4; i++) { - p1[i] = generate_random_double(0.0000, 4096.000000); - p2[i] = generate_random_double(0.0000, 16.0000); - } - double x = generate_random_double(0.0000, 1.0000); - - av1_interp_cubic_rate_dist_c(p1, p2, x, &rate_f_ref, &distbysse_f_ref); - target_func_(p1, p2, x, &rate_f_mod, &distbysse_f_mod); - EXPECT_EQ(rate_f_ref, rate_f_mod) << "Error: rate_f value mismatch"; - EXPECT_EQ(distbysse_f_ref, distbysse_f_mod) - << "Error: distbysse_f value mismatch"; - } -} - -void InterpCubicTest::SpeedTest() { - double p1[4], p2[4]; - double rate_f_ref, rate_f_mod, distbysse_f_ref, distbysse_f_mod; - - for (int i = 0; i < 4; i++) { - p1[i] = generate_random_double(0.0000, 4096.0000); - p2[i] = generate_random_double(0.0000, 16.0000); - } - double x = generate_random_double(0.0000, 1.0000); - - const int num_iter = 100000000; - - aom_usec_timer ref_timer, test_timer; - aom_usec_timer_start(&ref_timer); - for (int iter = 0; iter < num_iter; iter++) { - av1_interp_cubic_rate_dist_c(p1, p2, x, &rate_f_ref, &distbysse_f_ref); - } - aom_usec_timer_mark(&ref_timer); - const int elapsed_time_c = - static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); - - aom_usec_timer_start(&test_timer); - for (int iter = 0; iter < num_iter; iter++) { - target_func_(p1, p2, x, &rate_f_mod, &distbysse_f_mod); - } - aom_usec_timer_mark(&test_timer); - const int elapsed_time_simd = - static_cast<int>(aom_usec_timer_elapsed(&test_timer)); - - printf( - " c_time=%d \t simd_time=%d \t " - "Scaling=%lf \n", - elapsed_time_c, elapsed_time_simd, - (static_cast<double>(elapsed_time_c) / elapsed_time_simd)); -} - -TEST_P(InterpCubicTest, CheckOutput) { CheckOutput(); } - -TEST_P(InterpCubicTest, DISABLED_Speed) { SpeedTest(); } - -#if HAVE_SSE4_1 -INSTANTIATE_TEST_SUITE_P(SSE4_1, InterpCubicTest, - ::testing::Values(av1_interp_cubic_rate_dist_sse4_1)); -#endif // HAVE_SSE4_1 - -} // namespace
diff --git a/test/test.cmake b/test/test.cmake index 84ba918..6224c54 100644 --- a/test/test.cmake +++ b/test/test.cmake
@@ -232,7 +232,6 @@ "${AOM_ROOT}/test/masked_variance_test.cc" "${AOM_ROOT}/test/metadata_test.cc" "${AOM_ROOT}/test/minmax_test.cc" - "${AOM_ROOT}/test/model_rd_test.cc" "${AOM_ROOT}/test/motion_vector_test.cc" "${AOM_ROOT}/test/mv_cost_test.cc" "${AOM_ROOT}/test/obmc_sad_test.cc"